#Read in files from Galaxy

allTax1 <- read.table('Galaxy27-[Report_ Kraken2 on 4yo].tabular',
                      sep='\t',fill = TRUE, 
                      colClasses=c('character','numeric'))
allTax2 <- read.table('Galaxy29-[Report_ Kraken2 on 39yo].tabular',
                      sep='\t',fill = TRUE, 
                      colClasses=c('character','numeric'))

#Create dataframes with just species rows and without s

sRows1 <- allTax1[grepl("\\|s", allTax1[["V1"]]),]
restTax1 <- allTax1[!grepl("\\|s", allTax1[["V1"]]),]
sRows2 <- allTax2[grepl("\\|s", allTax2[["V1"]]),]
restTax2 <- allTax2[!grepl("\\|s", allTax2[["V1"]]),]

#Format columns, clean taxonomic names, and write species data to files

sRows1$V1 <- gsub("d__.*s__","",sRows1$V1)
sRows2$V1 <- gsub("d__.*s__","",sRows2$V1)
names(sRows1)<-c('Species','Count')
names(sRows2)<-c('Species','Count')
write.csv(sRows1,'ChildSpecies.csv',row.names=FALSE)
write.csv(sRows2,'AdultSpecies.csv',row.names=FALSE)

#Create a tidy dataframe with counts from both samples #Make other row for taxa with low count #This example only names top 10 species in legend

sRows1 <- arrange(sRows1,-Count)
otherRow1 <- data.frame(Species=character(),
                        Count=numeric())
otherRow1[1,1] <-'Other'
otherRow1[1,2] <- sum(sRows1[11:nrow(sRows1),2])
topSpecies1 <- bind_rows(sRows1[1:10,],otherRow1)

sRows2 <- arrange(sRows2,-Count)
otherRow2 <- data.frame(Species=character(),
                        Count=numeric())
otherRow2[1,1] <-'Other'
otherRow2[1,2] <- sum(sRows2[11:nrow(sRows2),2])
topSpecies2 <- bind_rows(sRows2[1:10,],otherRow1)

#Change the data from counts to percentages in both dataframes
totalSample1 <- sum(topSpecies1$Count)
totalSample2 <- sum(topSpecies2$Count)  
topSpecies1$Count <- topSpecies1$Count/totalSample1
topSpecies2$Count <- topSpecies2$Count/totalSample2

#Tidy
Sample1<-data.frame(Species = topSpecies1$Species, 
                    Sample="Child", 
                    ReadProportion=topSpecies1$Count)
Sample2<-data.frame(Species = topSpecies2$Species, 
                    Sample="Adult", 
                    ReadProportion=topSpecies2$Count) 
myData <- bind_rows(Sample1,Sample2)

#Plot taxonomic bar plot for species

p<-ggplot(myData, aes(x = Sample, y = ReadProportion, fill = Species)) + 
  geom_bar(stat = "identity") + 
  theme(legend.text = element_text(colour="red", size = 6))
p  #If using markdown, typing p on console opens plot in plots window for download