STEP 1: Read in .tabular files from Galaxy Report

Filter to species rows and clean up names

ALTERNATIVELY YOU CAN READ IN YOUR FILES FROM 2.08 INSTEAD

NoData <- read.table('NoToothbrush.tabular',sep='\t')
Data <- read.table('Toothbrush.tabular',sep='\t')
sRows1 <- NoData[grepl("d__.*s__",NoData$V1),]
sRows1$V1 <- gsub("d__.*s__","",sRows1$V1)
names(sRows1)<-c('Species','Count')
sRows2 <- Data[grepl("d__.*s__",Data$V1),]
sRows2$V1 <- gsub("d__.*s__","",sRows2$V1)
names(sRows2)<-c('Species','Count')

STEP 2: Create tidy dataframe for top 20 counts + other

sRows1 <- arrange(sRows1,-Count)
otherRow1 <- data.frame(Species=character(),
                        Count=numeric())
otherRow1[1,1] <-'Other'
otherRow1[1,2] <- sum(sRows1[21:nrow(sRows1),2])
topSpecies1 <- bind_rows(sRows1[1:20,],otherRow1)

sRows2 <- arrange(sRows2,-Count)
otherRow2 <- data.frame(Species=character(),
                        Count=numeric())
otherRow2[1,1] <-'Other'
otherRow2[1,2] <- sum(sRows2[21:nrow(sRows2),2])
topSpecies2 <- bind_rows(sRows2[1:20,],otherRow1)

#Change the data from counts to percentages in both dataframes
totalSample1 <- sum(topSpecies1$Count)
totalSample2 <- sum(topSpecies2$Count)  
topSpecies1$Count <- topSpecies1$Count/totalSample1
topSpecies2$Count <- topSpecies2$Count/totalSample2

#Tidy
Sample1<-data.frame(Species = topSpecies1$Species, 
                    Sample="Sample1", 
                    ReadProportion=topSpecies1$Count)
Sample2<-data.frame(Species = topSpecies2$Species, 
                    Sample="Sample2", 
                    ReadProportion=topSpecies2$Count) 
myData <- bind_rows(Sample1,Sample2)

STEP 3: Plot

p<-ggplot(myData, aes(x = Sample, y = ReadProportion, fill = Species)) + 
  geom_bar(stat = "identity") + 
  theme(legend.text = element_text(colour="red", size = 6))
p  #If using markdown, typing p on console opens plot in plots window for download