#Read in files from Galaxy
allTax1 <- read.table('Galaxy27-[Report_ Kraken2 on 4yo].tabular',
sep='\t',fill = TRUE,
colClasses=c('character','numeric'))
allTax2 <- read.table('Galaxy29-[Report_ Kraken2 on 39yo].tabular',
sep='\t',fill = TRUE,
colClasses=c('character','numeric'))
#Create dataframes with just species rows and without s
sRows1 <- allTax1[grepl("\\|s", allTax1[["V1"]]),]
restTax1 <- allTax1[!grepl("\\|s", allTax1[["V1"]]),]
sRows2 <- allTax2[grepl("\\|s", allTax2[["V1"]]),]
restTax2 <- allTax2[!grepl("\\|s", allTax2[["V1"]]),]
#Format columns, clean taxonomic names, and write species data to files
sRows1$V1 <- gsub("d__.*s__","",sRows1$V1)
sRows2$V1 <- gsub("d__.*s__","",sRows2$V1)
names(sRows1)<-c('Species','Count')
names(sRows2)<-c('Species','Count')
write.csv(sRows1,'ChildSpecies.csv',row.names=FALSE)
write.csv(sRows2,'AdultSpecies.csv',row.names=FALSE)
#Create a tidy dataframe with counts from both samples #Make other row for taxa with low count #This example only names top 10 species in legend
sRows1 <- arrange(sRows1,-Count)
otherRow1 <- data.frame(Species=character(),
Count=numeric())
otherRow1[1,1] <-'Other'
otherRow1[1,2] <- sum(sRows1[11:nrow(sRows1),2])
topSpecies1 <- bind_rows(sRows1[1:10,],otherRow1)
sRows2 <- arrange(sRows2,-Count)
otherRow2 <- data.frame(Species=character(),
Count=numeric())
otherRow2[1,1] <-'Other'
otherRow2[1,2] <- sum(sRows2[11:nrow(sRows2),2])
topSpecies2 <- bind_rows(sRows2[1:10,],otherRow1)
#Change the data from counts to percentages in both dataframes
totalSample1 <- sum(topSpecies1$Count)
totalSample2 <- sum(topSpecies2$Count)
topSpecies1$Count <- topSpecies1$Count/totalSample1
topSpecies2$Count <- topSpecies2$Count/totalSample2
#Tidy
Sample1<-data.frame(Species = topSpecies1$Species,
Sample="Child",
ReadProportion=topSpecies1$Count)
Sample2<-data.frame(Species = topSpecies2$Species,
Sample="Adult",
ReadProportion=topSpecies2$Count)
myData <- bind_rows(Sample1,Sample2)
#Plot taxonomic bar plot for species
p<-ggplot(myData, aes(x = Sample, y = ReadProportion, fill = Species)) +
geom_bar(stat = "identity") +
theme(legend.text = element_text(colour="red", size = 6))
p #If using markdown, typing p on console opens plot in plots window for download