This is a script for parsing KO lists generated by searching the KEGG database, which look like such:
kegg.s.list <- read.csv("KEGG.hydroxylases.txt", stringsAsFactors = F, sep="\t",
header = F)
rows = nrow(kegg.s.list)
kegg.s.list.table <- data.frame(KO = character(), description = character(), stringsAsFactors = F)
for (x in c(0:(rows/2-1))) {
kegg.s.list.table[x+1, 1] <- kegg.s.list[x*2+1, 1]
kegg.s.list.table[x+1, 2] <- kegg.s.list[x*2+2, 1]
}
write.csv(kegg.s.list.table, "KEGG.all.hydroxylases.csv")
head(kegg.s.list.table)
## KO description
## 1 K00472 P4HA; prolyl 4-hydroxylase [EC:1.14.11.2]
## 2 K00476 ASPH; aspartate beta-hydroxylase [EC:1.14.11.16]
## 3 K00477 PHYH; phytanoyl-CoA hydroxylase [EC:1.14.11.18]
## 4 K00480 E1.14.13.1; salicylate hydroxylase [EC:1.14.13.1]
## 5 K00500 phhA, PAH; phenylalanine-4-hydroxylase [EC:1.14.16.1]
## 6 K00517 CYP81F; indol-3-yl-methylglucosinolate hydroxylase [EC:1.14.-.-]