R Markdown

This script shows how to systematically filter rows from a Kraken2 report based on classification level and create new files for every level. The names in the output file are left in the long format (not cleaned up), but the column headers are renamed for every type of file (Species, Class, etc.)

#Jessica Kaufman
#February 14, 2022
#script to split ALL taxonomic report by clade

#read in files
#all variable1 names are for Galaxy39-Child Data
#all variable2 names are for Galaxy41-Adult Data
allTax1 <- read.table('Galaxy39-[Report__Kraken2_on_data_33].tabular',
                      sep='\t',fill = TRUE, 
                      colClasses=c('character','numeric'))
allTax2 <- read.table('Galaxy41-[Report__Kraken2_on_data_36].tabular',
                      sep='\t',fill = TRUE, 
                      colClasses=c('character','numeric'))


#Create dataframes with just species rows and without s
sRows1 <- allTax1[grepl("\\|s", allTax1[["V1"]]),]
restTax1 <- allTax1[!grepl("\\|s", allTax1[["V1"]]),]
sRows2 <- allTax2[grepl("\\|s", allTax2[["V1"]]),]
restTax2 <- allTax2[!grepl("\\|s", allTax2[["V1"]]),]

#Format columns and write species data to files
names(sRows1)<-c('Species','Count')
names(sRows2)<-c('Species','Count')
write.csv(sRows1,'ChildSpecies.csv',row.names=FALSE)
write.csv(sRows2,'AdultSpecies.csv',row.names=FALSE)

#Create dataframe with just genus rows and one without g
gRows1 <- restTax1[grepl("\\|g", restTax1[["V1"]]),]
restTax1 <- restTax1[!grepl("\\|g", restTax1[["V1"]]),]
gRows2 <- restTax2[grepl("\\|g", restTax2[["V1"]]),]
restTax2 <- restTax2[!grepl("\\|g", restTax2[["V1"]]),]

#Format columns and write genus data to files
names(gRows1)<-c('Genus','Count')
names(gRows2)<-c('Genus','Count')
write.csv(gRows1,'ChildGenus.csv',row.names=FALSE)
write.csv(gRows2,'AdultGenus.csv',row.names=FALSE)

#Create dataframe with just family rows and one without f
fRows1 <- restTax1[grepl("\\|f", restTax1[["V1"]]),]
restTax1 <- restTax1[!grepl("\\|f", restTax1[["V1"]]),]
fRows2 <- restTax2[grepl("\\|f", restTax2[["V1"]]),]
restTax2 <- restTax2[!grepl("\\|f", restTax2[["V1"]]),]

#Format columns and write family data to files
names(fRows1)<-c('Family','Count')
names(fRows2)<-c('Family','Count')
write.csv(fRows1,'ChildFamily.csv',row.names=FALSE)
write.csv(fRows2,'AdultFamily.csv',row.names=FALSE)

#Create dataframe with just order rows and one without o
oRows1 <- restTax1[grepl("\\|o", restTax1[["V1"]]),]
restTax1 <- restTax1[!grepl("\\|o", restTax1[["V1"]]),]
oRows2 <- restTax2[grepl("\\|o", restTax2[["V1"]]),]
restTax2 <- restTax2[!grepl("\\|o", restTax2[["V1"]]),]

#Format columns and write order data to files
names(oRows1)<-c('Order','Count')
names(oRows2)<-c('Order','Count')
write.csv(oRows1,'ChildOrder.csv',row.names=FALSE)
write.csv(oRows2,'AdultOrder.csv',row.names=FALSE)

#Create dataframe with just class rows and one without c
cRows1 <- restTax1[grepl("\\|c", restTax1[["V1"]]),]
restTax1 <- restTax1[!grepl("\\|c", restTax1[["V1"]]),]
cRows2 <- restTax2[grepl("\\|c", restTax2[["V1"]]),]
restTax2 <- restTax2[!grepl("\\|c", restTax2[["V1"]]),]

#Format columns and write class data to files
names(cRows1)<-c('Class','Count')
names(cRows2)<-c('Class','Count')
write.csv(cRows1,'ChildClass.csv',row.names=FALSE)
write.csv(cRows2,'AdultClass.csv',row.names=FALSE)

#Create dataframe with just phylum rows
pRows1 <- restTax1[grepl("\\|p", restTax1[["V1"]]),]
pRows2 <- restTax2[grepl("\\|p", restTax2[["V1"]]),]

#Format columns and write family data to files
names(pRows1)<-c('Phylum','Count')
names(pRows2)<-c('Phylum','Count')
write.csv(pRows1,'ChildPhylum.csv',row.names=FALSE)
write.csv(pRows2,'AdultPhylum.csv',row.names=FALSE)