Details

This is an R Markdown document. This is a comparison of Transcriptome Assembly Protocols ABySS and Trinity. The graphs show sequenctial changes to the assembly, and these data were generated using GAEMR basic_assebly_stats.py.

RNASeq data from C. odorata from HiSeq 3000 PE-100.

library(ggplot2)
library(gridExtra)
cedrela<-read.csv("~/Documents/Cedrela/NCGR Internship files/Assembly_Stats/T_assembly_stats.2.csv")

nounitigs<-cedrela
nounitigs<- nounitigs[order(nounitigs$Name),]
nounitigs$Name <- factor(nounitigs$Name, as.character(nounitigs$Name))

list(nounitigs$Name)
## [[1]]
##  [1] I-ABYSS.CdHit                J-ABYSS.Cap3                
##  [3] K-ABYSS.Sc                   O-GapCloser.100-3           
##  [5] P-abyss-No_gaps_CdHit        Q-Abyss-Final-Filtered      
##  [7] T-Trinity.Pooled             U-Trinity.Cap3              
##  [9] V-Trinity-AbyssScaffold      Z-Trinity-Sc-GapCloser-100-3
## [11] ZA-Trinity-No_gaps_CdHit     ZB-Trinity-Final_Filtered   
## 12 Levels: I-ABYSS.CdHit J-ABYSS.Cap3 K-ABYSS.Sc ... ZB-Trinity-Final_Filtered
xlabels<-scale_x_discrete(name="",labels=c("I-ABYSS.CdHit"="ABySS", 
                                           "J-ABYSS.Cap3"="ABySS",
                                           "K-ABYSS.Sc"="ABySS", 
                                           "O-GapCloser.100-3"="ABySS",
                                           "P-abyss-No_gaps_CdHit"="ABySS",
                                           "Q-Abyss-Final-Filtered"="ABySS Final",
                                           "T-Trinity.Pooled"="TRINITY",
                                           "U-Trinity.Cap3"="TRINITY",
                                           "V-Trinity-AbyssScaffold"="TRINITY",
                                      "Z-Trinity-Sc-GapCloser-100-3"="TRINITY",
                                           "ZA-Trinity-No_gaps_CdHit"="TRINITY",
                                           "ZB-Trinity-Final_Filtered"="TRINITY Final"))
list(nounitigs$Method)
## [[1]]
##  [1] ABySS     Cap3      ABYSS-Sc  GapCloser Cd.Hit    Filtered  Trinity  
##  [8] Cap3      ABYSS-Sc  GapCloser Cd.Hit    Filtered 
## Levels: ABySS ABYSS-Sc Cap3 Cd.Hit Filtered GapCloser Trinity
methodPalette <- c("#0072B2","grey80","grey90",  "grey60", "grey50","grey70", "#D55E00")

g1<- ggplot(nounitigs, aes(x=Name,y=MaxScaffold, fill=Method)) +theme_bw()+
  geom_bar(stat="identity")+
  ggtitle("Length of Longest Seq.")+
  scale_fill_manual(values=methodPalette)+
  scale_y_continuous(name="bp")+
  theme(axis.text.x  = element_text(angle=90, vjust=0.5))+
  theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+ 
  theme(legend.position="none", axis.text=element_text(size=8))+
  xlabels

g2<- ggplot(data=nounitigs, aes(x=Name, y=ScaffoldN50, fill=Method)) +theme_bw()+
  geom_bar(stat="identity")+
  ggtitle("N50")+ 
  scale_y_continuous(name="N50")+
  scale_fill_manual(values=methodPalette)+
  theme(axis.text.x  = element_text(angle=90, vjust=0.5))+
  theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+ 
  theme(legend.position="bottom", axis.text=element_text(size=8))+
  xlabels

g3 <-ggplot(data=nounitigs, aes(x=Name, y=TotalScaffoldLength, fill=Method)) +theme_bw()+
  geom_bar(stat="identity")+
  ggtitle("Total Length")+
  scale_y_continuous(name="Mbp")+
  scale_fill_manual(values=methodPalette)+
  theme(axis.text.x  = element_text(angle=90, vjust=0.5))+
  theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+ 
  theme(legend.position="none", axis.text=element_text(size=8))+
  xlabels

g4<- ggplot(data=nounitigs, aes(x=Name, y=TotalGapLength, fill=Method)) +theme_bw()+
  geom_bar(stat="identity")+
  ggtitle("Total Gap Length")+
  scale_y_continuous(name="Mbp")+
  scale_fill_manual(values=methodPalette, 
                    labels=c("ABYSS-Sc", "Cd.Hit", "Cap3","Filter", "GapCloser", "Trinity",
                             name="Method"))+
  theme(axis.text.x  = element_text(angle=90, vjust=0.5))+
  theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+
  theme(legend.position="bottom",
        axis.text=element_text(size=8),
        legend.key =element_blank())+
  xlabels

g5<- ggplot(data=nounitigs, aes(x=Name, y=Scaffolds, fill=Method)) +theme_bw()+
  geom_bar(stat="identity")+
  ggtitle("Number of Sequences")+
  scale_y_continuous(name="Count")+
  scale_fill_manual(values=methodPalette)+
  theme(axis.text.x  = element_text(angle=90, vjust=0.5))+
  theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+
  theme(legend.position="none",
        axis.text=element_text(size=8))+
  xlabels

g6<- ggplot(data=nounitigs, aes(x=Name, y=AssemblyGC, fill=Method)) +theme_bw()+
  geom_bar(stat="identity")+
  ggtitle("GC Content")+
  scale_y_continuous(name="% GC")+
  scale_fill_manual(values=methodPalette)+
  theme(axis.text.x  = element_text(angle=90, vjust=0.5))+
  theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())+
  theme(legend.position="none",
        axis.text=element_text(size=8))+
  xlabels
grid.arrange(g1, g5, g3,g2, ncol=2)