Transrate assembly and contig plots.

Assemblies of mouse with 10, 20, 50 and 100 million reads.

Assembly-level stats

Load data

setwd("~/Dropbox/ongoing_projects/feeding_transcriptome/final")
astats <- read.csv("all_10M_assemblies.csv")
astats$num_pairs <- 1e+07
astats$percent_mapping <- with(astats, total_mappings/num_pairs * 100)
astats$pc_good_mapping <- with(astats, good_mappings/num_pairs * 100)
astats$assembly <- gsub(astats$assembly, pattern = "\\.corr\\.Trinity\\.fasta", 
    replacement = "")
library(reshape2)
astats <- melt(astats, id = "assembly")
astats <- astats[complete.cases(astats), ]
astats$assembly <- factor(x = astats$assembly, levels = c("10M", "20M", "50M", 
    "100M"), ordered = T)

Plot the metrics

library(ggplot2)
ggplot(astats, aes(x = assembly, y = value, fill = assembly)) + geom_bar(position = "dodge", 
    stat = "identity") + facet_grid(variable ~ ., scales = "free") + theme(strip.text.y = element_text(angle = 0))

plot of chunk unnamed-chunk-2

Contig-level stats

Load data

library(reshape2)
setwd("~/Dropbox/ongoing_projects/feeding_transcriptome/final")
cstats <- read.csv("all_10M_10M.corr.Trinity.fasta_contigs.csv")
cstats$millionreads <- 10
cstats20 <- read.csv("all_10M_20M.corr.Trinity.fasta_contigs.csv")
cstats20$millionreads <- 20
cstats <- rbind(cstats, cstats20)
cstats50 <- read.csv("all_10M_50M.corr.Trinity.fasta_contigs.csv")
cstats50$millionreads <- 50
cstats <- rbind(cstats, cstats50)
cstats100 <- read.csv("all_10M_100M.corr.Trinity.fasta_contigs.csv")
cstats100$millionreads <- 100
cstats <- rbind(cstats, cstats100)

Plot the metrics

Contig stats

Length

library(ggplot2)
ggplot(cstats, aes(x = length, colour = factor(millionreads))) + geom_density() + 
    labs(colour = "Reads (x10^6)")

plot of chunk unnamed-chunk-4

GC

ggplot(cstats, aes(x = prop_gc, colour = factor(millionreads))) + geom_density() + 
    labs(colour = "Reads (x10^6)")

plot of chunk unnamed-chunk-5

GC skew

ggplot(cstats, aes(x = gc_skew, colour = factor(millionreads))) + geom_density() + 
    labs(colour = "Reads (x10^6)")

plot of chunk unnamed-chunk-6

AT skew

ggplot(cstats, aes(x = at_skew, colour = factor(millionreads))) + geom_density() + 
    labs(colour = "Reads (x10^6)")

plot of chunk unnamed-chunk-7

CpG count

ggplot(cstats, aes(x = cpg_count, colour = factor(millionreads))) + geom_density() + 
    labs(colour = "Reads (x10^6)")

plot of chunk unnamed-chunk-8

Cpg ratio

ggplot(cstats, aes(x = cpg_ratio, colour = factor(millionreads))) + geom_density() + 
    labs(colour = "Reads (x10^6)")

plot of chunk unnamed-chunk-9

Orf length

ggplot(cstats, aes(x = orf_length, colour = factor(millionreads))) + geom_density() + 
    labs(colour = "Reads (x10^6)") + scale_x_log10()

plot of chunk unnamed-chunk-10

Linguistic complexity (k=6)

ggplot(cstats, aes(x = linguistic_complexity_6, colour = factor(millionreads))) + 
    geom_density() + labs(colour = "Reads (x10^6)")

plot of chunk unnamed-chunk-11

Read metrics

Uncovered bases

ggplot(cstats, aes(x = uncovered_bases, colour = factor(millionreads))) + geom_density() + 
    labs(colour = "Reads (x10^6)") + scale_x_log10()
## Warning: Removed 27451 rows containing non-finite values (stat_density).
## Warning: Removed 1409 rows containing non-finite values (stat_density).
## Warning: Removed 802 rows containing non-finite values (stat_density).
## Warning: Removed 551 rows containing non-finite values (stat_density).

plot of chunk unnamed-chunk-12

Mean coverage

ggplot(cstats, aes(x = mean_coverage, colour = factor(millionreads))) + geom_density() + 
    labs(colour = "Reads (x10^6)") + scale_x_log10()
## Warning: Removed 439 rows containing non-finite values (stat_density).
## Warning: Removed 27998 rows containing non-finite values (stat_density).
## Warning: Removed 68310 rows containing non-finite values (stat_density).
## Warning: Removed 115586 rows containing non-finite values (stat_density).

plot of chunk unnamed-chunk-13

ggplot(cstats, aes(y = mean_coverage, x = factor(millionreads))) + geom_violin() + 
    labs(colour = "Reads (x10^6)") + scale_y_log10()
## Warning: Removed 212333 rows containing non-finite values (stat_ydensity).

plot of chunk unnamed-chunk-13

In bridges

ggplot(cstats, aes(x = in_bridges, colour = factor(millionreads))) + geom_density() + 
    labs(colour = "Reads (x10^6)") + scale_x_log10()
## Warning: Removed 32697 rows containing non-finite values (stat_density).
## Warning: Removed 65166 rows containing non-finite values (stat_density).
## Warning: Removed 125883 rows containing non-finite values (stat_density).
## Warning: Removed 190412 rows containing non-finite values (stat_density).

plot of chunk unnamed-chunk-14

In bridges

ggplot(cstats, aes(x = in_bridges, colour = factor(millionreads))) + geom_density() + 
    labs(colour = "Reads (x10^6)") + scale_x_log10()
## Warning: Removed 32697 rows containing non-finite values (stat_density).
## Warning: Removed 65166 rows containing non-finite values (stat_density).
## Warning: Removed 125883 rows containing non-finite values (stat_density).
## Warning: Removed 190412 rows containing non-finite values (stat_density).

plot of chunk unnamed-chunk-15

Edit distance per base

ggplot(cstats, aes(x = edit_distance_per_base, colour = factor(millionreads))) + 
    geom_density() + labs(colour = "Reads (x10^6)")
## Warning: Removed 18 rows containing non-finite values (stat_density).
## Warning: Removed 14927 rows containing non-finite values (stat_density).
## Warning: Removed 47017 rows containing non-finite values (stat_density).
## Warning: Removed 89856 rows containing non-finite values (stat_density).

plot of chunk unnamed-chunk-16

Low uniqueness bases

ggplot(cstats, aes(x = low_uniqueness_bases, colour = factor(millionreads))) + 
    geom_density() + labs(colour = "Reads (x10^6)") + scale_x_log10()
## Warning: Removed 25827 rows containing non-finite values (stat_density).
## Warning: Removed 1149 rows containing non-finite values (stat_density).
## Warning: Removed 654 rows containing non-finite values (stat_density).
## Warning: Removed 468 rows containing non-finite values (stat_density).

plot of chunk unnamed-chunk-17

Low uniqueness bases (proportion)

ggplot(cstats, aes(x = p_low_uniqueness_bases, colour = factor(millionreads))) + 
    geom_density() + labs(colour = "Reads (x10^6)")
## Warning: Removed 6 rows containing non-finite values (stat_density).
## Warning: Removed 12 rows containing non-finite values (stat_density).

plot of chunk unnamed-chunk-18

Reference-based metrics

Reference coverage

ggplot(cstats, aes(x = reference_coverage, colour = factor(millionreads))) + 
    geom_density() + labs(colour = "Reads (x10^6)")
## Warning: Removed 48702 rows containing non-finite values (stat_density).
## Warning: Removed 78614 rows containing non-finite values (stat_density).
## Warning: Removed 142959 rows containing non-finite values (stat_density).
## Warning: Removed 209945 rows containing non-finite values (stat_density).

plot of chunk unnamed-chunk-19