This is an R Markdown document. This is a comparison of Chloroplast Assembly Protocols ABySS and Spades. The graphs show sequenctial changes to the assembly, and these data were generated using GAEMR basic_assebly_stats.py.
DNASeq data from C. odorata from HiSeq 3000 PE-100.
library(ggplot2)
library(gridExtra)
chloro<-read.csv("~/Documents/Cedrela/NCGR Internship files/Assembly_Stats/Cp_Assembly_Stats.2.csv")
chloro<- chloro[order(chloro$Name),]
chloro$Name <- factor(chloro$Name, as.character(chloro$Name))
levels(chloro$Name)
## [1] "J-Pooled_unitigs" "K-Cap3" "L-Abyss-Sc"
## [4] "M-CdHit" "N-Filtered" "N-NoGaps"
## [7] "P-trimmed"
xlabels<-scale_x_discrete(name="",labels=c("J-Pooled_unitigs"="ABySS","K-Cap3"="ABySS",
"L-Abyss-Sc"="ABySS","M-CdHit"="ABySS",
"N-Filtered"="ABySS","N-NoGaps"="ABySS",
"P-trimmed"="SPAdes"))
list(chloro$Method)
## [[1]]
## [1] ABySS Cap3 Abyss-Sc CdHit Filtered GapCloser SPAdes
## Levels: ABySS Abyss-Sc Cap3 CdHit Filtered GapCloser SPAdes
methodPalette <- c("#0072B2","grey80","grey90", "grey60", "grey50","grey70", "#D55E00")
g1<- ggplot(chloro, aes(x=Name,y= MaxScaffold, fill=Method)) +theme_bw()+
geom_bar(stat="identity")+
ggtitle("Length of Longest Seq.")+
scale_fill_manual(values=methodPalette)+
scale_y_continuous(name="bp")+
theme(axis.text.x = element_text(angle=90, vjust=0.5))+
theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+
theme(legend.position="none",axis.text=element_text(size=10))+xlabels
g2<- ggplot(data=chloro, aes(x=Name, y=ScaffoldN50, fill=Method)) +theme_bw()+
geom_bar(stat="identity")+
ggtitle("N50")+
scale_fill_manual(values=methodPalette)+
scale_y_continuous(name="N50")+
theme(axis.text.x = element_text(angle=90, vjust=0.5))+
theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+
scale_fill_manual(values=methodPalette, name="Method",
breaks=c("ABySS","Abyss-Sc","Cap3","CdHit","Filtered","GapCloser","SPAdes"),
labels=c("ABySS","ABYSS-Sc", "Cap3","CdHit","Filtered","GapCloser","SPAdes"))+
theme(legend.position="bottom",axis.text=element_text(size=10),
legend.key =element_blank())+xlabels
## Scale for 'fill' is already present. Adding another scale for 'fill',
## which will replace the existing scale.
g3 <-ggplot(data=chloro, aes(x=Name, y=TotalScaffoldLength, fill=Method)) +theme_bw()+
geom_bar(stat="identity")+
ggtitle("Total Length")+
scale_fill_manual(values=methodPalette)+
scale_y_continuous(name="bp")+
theme(axis.text.x = element_text(angle=90, vjust=0.5))+
theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+
theme(legend.position="none",axis.text=element_text(size=10))+xlabels
g4<- ggplot(data=chloro, aes(x=Name, y=TotalGapLength, fill=Method)) +theme_bw()+
geom_bar(stat="identity")+
ggtitle("Total Gap Length")+
scale_fill_manual(values=methodPalette, name="Method",
breaks=c("ABySS","Abyss-Sc","Cap3","CdHit","Filtered","GapCloser","SPAdes"),
labels=c("ABySS","ABYSS-Sc", "Cap3","CdHit","Filtered","GapCloser","SPAdes"))+
scale_y_continuous(name="bp")+
theme(axis.text.x = element_text(angle=90, vjust=0.5))+
theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+
theme(legend.position="bottom",axis.text=element_text(size=10),
legend.key =element_blank())+xlabels
g5<- ggplot(data=chloro, aes(x=Name, y=Scaffolds, fill=Method)) +theme_bw()+
geom_bar(stat="identity")+
ggtitle("Number of Sequences")+
scale_y_continuous(name="Count")+
scale_fill_manual(values=methodPalette)+
theme(axis.text.x = element_text(angle=90, vjust=0.5))+
theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+
theme(legend.position="none",axis.text=element_text(size=10))+xlabels
g6<- ggplot(data=chloro, aes(x=Name, y=AssemblyGC, fill=Method)) +theme_bw()+
geom_bar(stat="identity")+
ggtitle("GC Content")+
scale_y_continuous(name="% GC")+
scale_fill_manual(values=methodPalette)+
theme(axis.text.x = element_text(angle=90, vjust=0.5))+
theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+
theme(legend.position="none",axis.text=element_text(size=10))+xlabels
grid.arrange(g1, g5, g3, g2, ncol=2)