Assemblies of mouse with 10, 20, 50 and 100 million reads.
setwd("~/Dropbox/ongoing_projects/feeding_transcriptome/final")
astats <- read.csv("all_10M_assemblies.csv")
astats$num_pairs <- 1e+07
astats$percent_mapping <- with(astats, total_mappings/num_pairs * 100)
astats$pc_good_mapping <- with(astats, good_mappings/num_pairs * 100)
astats$assembly <- gsub(astats$assembly, pattern = "\\.corr\\.Trinity\\.fasta",
replacement = "")
library(reshape2)
astats <- melt(astats, id = "assembly")
astats <- astats[complete.cases(astats), ]
astats$assembly <- factor(x = astats$assembly, levels = c("10M", "20M", "50M",
"100M"), ordered = T)
library(ggplot2)
ggplot(astats, aes(x = assembly, y = value, fill = assembly)) + geom_bar(position = "dodge",
stat = "identity") + facet_grid(variable ~ ., scales = "free") + theme(strip.text.y = element_text(angle = 0))
library(reshape2)
setwd("~/Dropbox/ongoing_projects/feeding_transcriptome/final")
cstats <- read.csv("all_10M_10M.corr.Trinity.fasta_contigs.csv")
cstats$millionreads <- 10
cstats20 <- read.csv("all_10M_20M.corr.Trinity.fasta_contigs.csv")
cstats20$millionreads <- 20
cstats <- rbind(cstats, cstats20)
cstats50 <- read.csv("all_10M_50M.corr.Trinity.fasta_contigs.csv")
cstats50$millionreads <- 50
cstats <- rbind(cstats, cstats50)
cstats100 <- read.csv("all_10M_100M.corr.Trinity.fasta_contigs.csv")
cstats100$millionreads <- 100
cstats <- rbind(cstats, cstats100)
Plot the metrics
library(ggplot2)
ggplot(cstats, aes(x = length, colour = factor(millionreads))) + geom_density() +
labs(colour = "Reads (x10^6)")
ggplot(cstats, aes(x = prop_gc, colour = factor(millionreads))) + geom_density() +
labs(colour = "Reads (x10^6)")
ggplot(cstats, aes(x = gc_skew, colour = factor(millionreads))) + geom_density() +
labs(colour = "Reads (x10^6)")
ggplot(cstats, aes(x = at_skew, colour = factor(millionreads))) + geom_density() +
labs(colour = "Reads (x10^6)")
ggplot(cstats, aes(x = cpg_count, colour = factor(millionreads))) + geom_density() +
labs(colour = "Reads (x10^6)")
ggplot(cstats, aes(x = cpg_ratio, colour = factor(millionreads))) + geom_density() +
labs(colour = "Reads (x10^6)")
ggplot(cstats, aes(x = orf_length, colour = factor(millionreads))) + geom_density() +
labs(colour = "Reads (x10^6)") + scale_x_log10()
ggplot(cstats, aes(x = linguistic_complexity_6, colour = factor(millionreads))) +
geom_density() + labs(colour = "Reads (x10^6)")
ggplot(cstats, aes(x = uncovered_bases, colour = factor(millionreads))) + geom_density() +
labs(colour = "Reads (x10^6)") + scale_x_log10()
## Warning: Removed 27451 rows containing non-finite values (stat_density).
## Warning: Removed 1409 rows containing non-finite values (stat_density).
## Warning: Removed 802 rows containing non-finite values (stat_density).
## Warning: Removed 551 rows containing non-finite values (stat_density).
ggplot(cstats, aes(x = mean_coverage, colour = factor(millionreads))) + geom_density() +
labs(colour = "Reads (x10^6)") + scale_x_log10()
## Warning: Removed 439 rows containing non-finite values (stat_density).
## Warning: Removed 27998 rows containing non-finite values (stat_density).
## Warning: Removed 68310 rows containing non-finite values (stat_density).
## Warning: Removed 115586 rows containing non-finite values (stat_density).
ggplot(cstats, aes(y = mean_coverage, x = factor(millionreads))) + geom_violin() +
labs(colour = "Reads (x10^6)") + scale_y_log10()
## Warning: Removed 212333 rows containing non-finite values (stat_ydensity).
ggplot(cstats, aes(x = in_bridges, colour = factor(millionreads))) + geom_density() +
labs(colour = "Reads (x10^6)") + scale_x_log10()
## Warning: Removed 32697 rows containing non-finite values (stat_density).
## Warning: Removed 65166 rows containing non-finite values (stat_density).
## Warning: Removed 125883 rows containing non-finite values (stat_density).
## Warning: Removed 190412 rows containing non-finite values (stat_density).
ggplot(cstats, aes(x = in_bridges, colour = factor(millionreads))) + geom_density() +
labs(colour = "Reads (x10^6)") + scale_x_log10()
## Warning: Removed 32697 rows containing non-finite values (stat_density).
## Warning: Removed 65166 rows containing non-finite values (stat_density).
## Warning: Removed 125883 rows containing non-finite values (stat_density).
## Warning: Removed 190412 rows containing non-finite values (stat_density).
ggplot(cstats, aes(x = edit_distance_per_base, colour = factor(millionreads))) +
geom_density() + labs(colour = "Reads (x10^6)")
## Warning: Removed 18 rows containing non-finite values (stat_density).
## Warning: Removed 14927 rows containing non-finite values (stat_density).
## Warning: Removed 47017 rows containing non-finite values (stat_density).
## Warning: Removed 89856 rows containing non-finite values (stat_density).
ggplot(cstats, aes(x = low_uniqueness_bases, colour = factor(millionreads))) +
geom_density() + labs(colour = "Reads (x10^6)") + scale_x_log10()
## Warning: Removed 25827 rows containing non-finite values (stat_density).
## Warning: Removed 1149 rows containing non-finite values (stat_density).
## Warning: Removed 654 rows containing non-finite values (stat_density).
## Warning: Removed 468 rows containing non-finite values (stat_density).
ggplot(cstats, aes(x = p_low_uniqueness_bases, colour = factor(millionreads))) +
geom_density() + labs(colour = "Reads (x10^6)")
## Warning: Removed 6 rows containing non-finite values (stat_density).
## Warning: Removed 12 rows containing non-finite values (stat_density).
ggplot(cstats, aes(x = reference_coverage, colour = factor(millionreads))) +
geom_density() + labs(colour = "Reads (x10^6)")
## Warning: Removed 48702 rows containing non-finite values (stat_density).
## Warning: Removed 78614 rows containing non-finite values (stat_density).
## Warning: Removed 142959 rows containing non-finite values (stat_density).
## Warning: Removed 209945 rows containing non-finite values (stat_density).