Description

This script shows a workaround for making a species bar plot when your Kraken report classified too many species to plot for one or both of your samples.

#Jessica Kaufman
#February 14, 2022
#Make bar plot of only top 10 species for each sample
allTax1 <- read.table('Galaxy39-[Report__Kraken2_on_data_33].tabular',
                      sep='\t',fill = TRUE, 
                      colClasses=c('character','numeric'))
allTax2 <- read.table('Galaxy41-[Report__Kraken2_on_data_36].tabular',
                      sep='\t',fill = TRUE, 
                      colClasses=c('character','numeric'))

#Create dataframes with just species rows
sRows1 <- allTax1[grepl("\\|s", allTax1[["V1"]]),]
sRows2 <- allTax2[grepl("\\|s", allTax2[["V1"]]),]

#Clean up species names by removing text between d and s
for (i in 1:nrow(sRows1)){
  sRows1[i,1]<-gsub("d__.*s__","",sRows1[i,1],perl=TRUE)
}
for (i in 1:nrow(sRows2)){
  sRows2[i,1]<-gsub("d__.*s__","",sRows2[i,1],perl=TRUE)
}
names(sRows1)<-c('Species','Count')
names(sRows2)<-c('Species','Count')

#Load dplyr package (make sure to install first)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#Use dplyr to get only top 10 for each sample
topChild <- sRows1 %>% top_n(10)
## Selecting by Count
topAdult <- sRows2 %>% top_n(10)
## Selecting by Count
#Follow steps in assignment
totalSample1 <- sum(topChild$Count)
totalSample2 <- sum(topAdult$Count)
topChild$Count <- topChild$Count/totalSample1
topAdult$Count <- topAdult$Count/totalSample2

#Create a single tidy (long format) dataframe
ChildDF<-as.data.frame(cbind(Species = topChild$Species, variable="Child", value=topChild$Count))
AdultDF<-as.data.frame(cbind(Species = topAdult$Species, variable="Adult", value=topAdult$Count))
myData <- rbind(ChildDF, AdultDF)
myData$value <- as.numeric(myData$value)
library(ggplot2)
ggplot(myData, aes(x = variable, y = value, fill = Species)) + 
  geom_bar(stat = "identity") + theme(legend.text = element_text(colour="red", size = 5))