Details

This is an R Markdown document. This is a comparison of Chloroplast Assembly Protocols ABySS and Spades. The graphs show sequenctial changes to the assembly, and these data were generated using GAEMR basic_assebly_stats.py.

DNASeq data from C. odorata from HiSeq 3000 PE-100.

library(ggplot2)
library(gridExtra)
chloro<-read.csv("~/Documents/Cedrela/NCGR Internship files/Assembly_Stats/Cp_Assembly_Stats.2.csv")

chloro<- chloro[order(chloro$Name),]
chloro$Name <- factor(chloro$Name, as.character(chloro$Name))

levels(chloro$Name)
## [1] "J-Pooled_unitigs" "K-Cap3"           "L-Abyss-Sc"      
## [4] "M-CdHit"          "N-Filtered"       "N-NoGaps"        
## [7] "P-trimmed"
xlabels<-scale_x_discrete(name="",labels=c("J-Pooled_unitigs"="ABySS","K-Cap3"="ABySS",
                                           "L-Abyss-Sc"="ABySS","M-CdHit"="ABySS",
                                           "N-Filtered"="ABySS","N-NoGaps"="ABySS",
                                           "P-trimmed"="SPAdes"))

list(chloro$Method)
## [[1]]
## [1] ABySS     Cap3      Abyss-Sc  CdHit     Filtered  GapCloser SPAdes   
## Levels: ABySS Abyss-Sc Cap3 CdHit Filtered GapCloser SPAdes
methodPalette <- c("#0072B2","grey80","grey90",  "grey60", "grey50","grey70", "#D55E00")

g1<- ggplot(chloro, aes(x=Name,y= MaxScaffold, fill=Method)) +theme_bw()+
  geom_bar(stat="identity")+
  ggtitle("Length of Longest Seq.")+
  scale_fill_manual(values=methodPalette)+
  scale_y_continuous(name="bp")+
  theme(axis.text.x  = element_text(angle=90, vjust=0.5))+
  theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+ 
  theme(legend.position="none",axis.text=element_text(size=10))+xlabels

g2<- ggplot(data=chloro, aes(x=Name, y=ScaffoldN50, fill=Method)) +theme_bw()+
  geom_bar(stat="identity")+
  ggtitle("N50")+
  scale_fill_manual(values=methodPalette)+
  scale_y_continuous(name="N50")+
  theme(axis.text.x  = element_text(angle=90, vjust=0.5))+
  theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+
  scale_fill_manual(values=methodPalette, name="Method",
                    breaks=c("ABySS","Abyss-Sc","Cap3","CdHit","Filtered","GapCloser","SPAdes"),
                    labels=c("ABySS","ABYSS-Sc", "Cap3","CdHit","Filtered","GapCloser","SPAdes"))+
  theme(legend.position="bottom",axis.text=element_text(size=10),
        legend.key =element_blank())+xlabels
## Scale for 'fill' is already present. Adding another scale for 'fill',
## which will replace the existing scale.
g3 <-ggplot(data=chloro, aes(x=Name, y=TotalScaffoldLength, fill=Method)) +theme_bw()+
  geom_bar(stat="identity")+
  ggtitle("Total Length")+
  scale_fill_manual(values=methodPalette)+
  scale_y_continuous(name="bp")+
  theme(axis.text.x  = element_text(angle=90, vjust=0.5))+
  theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+ 
  theme(legend.position="none",axis.text=element_text(size=10))+xlabels

g4<- ggplot(data=chloro, aes(x=Name, y=TotalGapLength, fill=Method)) +theme_bw()+
  geom_bar(stat="identity")+
  ggtitle("Total Gap Length")+ 
  scale_fill_manual(values=methodPalette, name="Method",
                    breaks=c("ABySS","Abyss-Sc","Cap3","CdHit","Filtered","GapCloser","SPAdes"),
                    labels=c("ABySS","ABYSS-Sc", "Cap3","CdHit","Filtered","GapCloser","SPAdes"))+
  scale_y_continuous(name="bp")+
  theme(axis.text.x  = element_text(angle=90, vjust=0.5))+
  theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+
  theme(legend.position="bottom",axis.text=element_text(size=10),
        legend.key =element_blank())+xlabels

g5<- ggplot(data=chloro, aes(x=Name, y=Scaffolds, fill=Method)) +theme_bw()+
  geom_bar(stat="identity")+
  ggtitle("Number of Sequences")+
  scale_y_continuous(name="Count")+
  scale_fill_manual(values=methodPalette)+
  theme(axis.text.x  = element_text(angle=90, vjust=0.5))+
  theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+
  theme(legend.position="none",axis.text=element_text(size=10))+xlabels

g6<- ggplot(data=chloro, aes(x=Name, y=AssemblyGC, fill=Method)) +theme_bw()+
  geom_bar(stat="identity")+
  ggtitle("GC Content")+
  scale_y_continuous(name="% GC")+
  scale_fill_manual(values=methodPalette)+
  theme(axis.text.x  = element_text(angle=90, vjust=0.5))+
  theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+
  theme(legend.position="none",axis.text=element_text(size=10))+xlabels

grid.arrange(g1, g5, g3, g2, ncol=2)