setwd("~/Desktop/test/")

source("parsers.R")
source("utilities.R")

library(magrittr)

KO_list = read.table("KO_list.txt",stringsAsFactors = F)$V1

KO_list
## [1] "K00002" "K00003" "K00004"
print(1)
## [1] 1
KO <- KO_list[1]

txt <- paste(readLines(paste0("kegg_api/",KO)),collapse = "\n")

head(txt)
## [1] "ENTRY       K00002                      KO\nNAME        AKR1A1, adh\nDEFINITION  alcohol dehydrogenase (NADP+) [EC:1.1.1.2]\nPATHWAY     ko00010  Glycolysis / Gluconeogenesis\n            ko00040  Pentose and glucuronate interconversions\n            ko00561  Glycerolipid metabolism\n            ko00930  Caprolactam degradation\n            ko01220  Degradation of aromatic compounds\nMODULE      M00014  Glucuronate pathway (uronate pathway)\nBRITE       KEGG Orthology (KO) [BR:ko00001]\n             Metabolism\n              Overview\n               01220 Degradation of aromatic compounds\n                K00002  AKR1A1, adh; alcohol dehydrogenase (NADP+)\n              Carbohydrate metabolism\n               00010 Glycolysis / Gluconeogenesis\n                K00002  AKR1A1, adh; alcohol dehydrogenase (NADP+)\n               00040 Pentose and glucuronate interconversions\n                K00002  AKR1A1, adh; alcohol dehydrogenase (NADP+)\n              Lipid metabolism\n               00561 Glycerolipid metabolism\n                K00002  AKR1A1, adh; alcohol dehydrogenase (NADP+)\n              Xenobiotics biodegradation and metabolism\n               00930 Caprolactam degradation\n                K00002  AKR1A1, adh; alcohol dehydrogenase (NADP+)\n            KEGG modules [BR:ko00002]\n             Pathway module\n              Carbohydrate and lipid metabolism\n               Other carbohydrate metabolism\n                M00014  Glucuronate pathway (uronate pathway)\n                 K00002  AKR1A1, adh; alcohol dehydrogenase (NADP+)\n            Enzymes [BR:ko01000]\n             1. Oxidoreductases\n              1.1  Acting on the CH-OH group of donors\n               1.1.1  With NAD+ or NADP+ as acceptor\n                1.1.1.2  alcohol dehydrogenase (NADP+)\n                 K00002  AKR1A1, adh; alcohol dehydrogenase (NADP+)\n            Exosome [BR:ko04147]\n             Exosomal proteins\n              Exosomal proteins of other body fluids (saliva and urine)\n               K00002  AKR1A1, adh; alcohol dehydrogenase (NADP+)\nDBLINKS     RN: R00746 R01041 R01481 R05231\n            COG: COG0656\n            GO: 0008106\nGENES       HSA: 10327(AKR1A1)\n            PTR: 741418(AKR1A1)\n            PPS: 100990701(AKR1A1)\n            GGO: 101124639(AKR1A1)\n            PON: 100173796(AKR1A1)\n            NLE: 100581229(AKR1A1)\n            MCC: 693380(AKR1A1)\n            MCF: 102143796(AKR1A1)\n            CSAB: 103224909(AKR1A1)\n            RRO: 104662135(AKR1A1)\n            CJC: 100404430(AKR1A1)\n            SBQ: 101054314(AKR1A1)\n            MMU: 58810(Akr1a1)\n            RNO: 78959(Akr1a1)\n            CGE: 100689106(Akr1a1)\n            NGI: 103737782(Akr1a1)\n            HGL: 101710959(Akr1a1)\n            OCU: 100343223 100346892\n            TUP: 102474258(AKR1A1)\n            CFA: 610537(AKR1A1)\n            AML: 100466911(AKR1A1)\n            UMR: 103670772(AKR1A1)\n            FCA: 101091464(AKR1A1)\n            PTG: 102953655(AKR1A1)\n            BTA: 618607(AKR1A1)\n            BOM: 102278148(AKR1A1)\n            PHD: 102329994(AKR1A1)\n            CHX: 102181747(AKR1A1)\n            OAS: 101121637(AKR1A1)\n            SSC: 396924(AKR1A1)\n            CFR: 102508336(AKR1A1)\n            BACU: 103012987(AKR1A1)\n            LVE: 103076421(AKR1A1)\n            ECB: 100052360(AKR1A1)\n            MYB: 102261432(AKR1A1)\n            MYD: 102763234(AKR1A1)\n            PALE: 102889346(AKR1A1)\n            LAV: 100674353\n            MDO: 100024617(AKR1A1)\n            SHR: 100933920(AKR1A1)\n            OAA: 100076515(AKR1A1)\n            GGA: 424599(AKR1A1)\n            MGP: 100547531(AKR1A1)\n            CJO: 107317439(AKR1A1)\n            APLA: 101804381(AKR1A1)\n            TGU: 100220342(AKR1A1)\n            GFR: 102039134(AKR1A1)\n            FAB: 101814677(AKR1A1)\n            PHI: 102111381(AKR1A1)\n            CCW: 104695240(AKR1A1)\n            FPG: 101914281(AKR1A1)\n            FCH: 102058485(AKR1A1)\n            CLV: 102086095(AKR1A1)\n            AAM: 106485925(AKR1A1)\n            ASN: 102381941(AKR1A1)\n            AMJ: 102560934(AKR1A1)\n            PSS: 102453813(AKR1A1)\n            CMY: 102932885(AKR1A1)\n            ACS: 100553683(akr1a1)\n            PBI: 103067202(AKR1A1)\n            GJA: 107107909(AKR1A1)\n            XLA: 444488(akr1a1.L)\n            XTR: 548891(akr1a1)\n            DRE: 445326(akr1a1a) 799805(akr1a1b)\n            TRU: 101075563 101077149(akr1a1)\n            TNG: GSTEN00028647G001\n            MZE: 101467748 101476039(akr1a1)\n            OLA: 101172969(akr1a1) 101175168\n            XMA: 102234019 102236025(akr1a1)\n            SASA: 106560437(AK1A1) 106580289 106584055(AK1A1)\n            LCM: 102347995 102356304(AKR1A1)\n            CMK: 103176872 103182771(akr1a1)\n            SPU: 587281\n            SKO: 100379040\n            DME: Dmel_CG2767\n            HRO: HELRODRAFT_101948 HELRODRAFT_90848\n            NVE: NEMVE_v1g181888 NEMVE_v1g224560\n            HMG: 100200051\n            TAD: TRIADDRAFT_29280 TRIADDRAFT_50776\n            AQU: 100632875\n            CME: CYME_CMM296C\n            GSL: Gasu_23120\n            SCE: YCR105W(ADH7) YMR318C(ADH6)\n            ERC: Ecym_1093\n            KLA: KLLA0E19889g\n            LTH: KLTH0B04972g KLTH0E07964g\n            VPO: Kpol_1057p24 Kpol_257p4 Kpol_526p18 Kpol_529p30\n            ZRO: ZYRO0G10318g\n            CGR: CAGL0H06853g CAGL0M14047g\n            NCS: NCAS_0C03020(NCAS0C03020) NCAS_0D01220(NCAS0D01220)\n            NDI: NDAI_0A08470(NDAI0A08470) NDAI_0G04880(NDAI0G04880) NDAI_0H02940(NDAI0H02940) NDAI_0H02950(NDAI0H02950)\n            TPF: TPHA_0A00140(TPHA0A00140)\n            TBL: TBLA_0A02730(TBLA0A02730) TBLA_0A06640(TBLA0A06640)\n            TDL: TDEL_0E00230(TDEL0E00230) TDEL_0F02300(TDEL0F02300)\n            KAF: KAFR_0A08730(KAFR0A08730) KAFR_0C01550(KAFR0C01550)\n            PPA: PAS_chr1-1_0357 PAS_chr4_0576\n            DHA: DEHA2C04488g DEHA2C04510g DEHA2C17666g\n            PIC: PICST_29079(ADH4) PICST_31312(ADH5) PICST_45137(ADH7)\n            PGU: PGUG_05188 PGUG_05764\n            LEL: LELG_04799\n            CAL: CaO19.12963(ADH7) CaO19.5517(ADH7)\n            CTP: CTRG_02797\n            COT: CORT_0F02920\n            CDU: CD36_62710\n            CLU: CLUG_03198 CLUG_03199 CLUG_03797 CLUG_05090\n            NCR: NCU04823\n            SMP: SMAC_06674\n            PAN: PODANSg2904 PODANSg4584\n            TTT: THITE_2118200\n            MTM: MYCTH_112014\n            MGR: MGG_00220 MGG_06489\n            NHE: NECHADRAFT_37210 NECHADRAFT_68747\n            MAW: MAC_03231 MAC_09408 MAC_09529\n            MAJ: MAA_09669\n            VAL: VDBG_02968\n            SSL: SS1G_00401 SS1G_07135\n            BFU: BC1G_00717 BC1G_10210\n            ANI: AN2860.2 AN3030.2\n            AFM: AFUA_1G09750 AFUA_3G11900\n            AOR: AOR_1_1328154(AO090003000751) AOR_1_780144(AO090023000460)\n            ANG: ANI_1_358144(An16g02510)\n            AFV: AFLA_030660\n            ACT: ACLA_039770\n            NFI: NFIA_065400 NFIA_114710\n            PCS: Pc18g00530\n            CIM: CIMG_07810\n            CPW: CPC735_041920\n            PBL: PAAG_06863\n            URE: UREG_02137\n            ABE: ARB_05690 ARB_06120\n            TVE: TRV_03693 TRV_03831\n            PNO: SNOG_15556\n            PTE: PTT_11113\n            BZE: COCCADRAFT_35593\n            BSC: COCSADRAFT_166068\n            BOR: COCMIDRAFT_4361\n            ZTR: MYCGRDRAFT_31826\n            TML: GSTUM_00001645001\n            SPO: SPAC26F1.07\n            CNE: CNC05140\n            CNB: CNBC2030\n            CGI: CGB_C7460C\n            PPL: POSPLDRAFT_106595 POSPLDRAFT_88715\n            CCI: CC1G_02196\n            UMA: UMAG_01861\n            PGR: PGTG_17917\n            MBR: MONBRDRAFT_32881\n            EHI: EHI_023110(81.t00034) EHI_039190(73.t00013) EHI_198760(34.t00053)\n            EDI: EDI_247390 EDI_307670\n            ACAN: ACA1_054920\n            PTI: PHATRDRAFT_41470\n            TPS: THAPSDRAFT_40021\n            PIF: PITG_21743\n            TCR: 504425.60\n            LMA: LMJF_23_0360\n            LIF: LINJ_23_0410\n            TVA: TVAG_344880\n            CBD: CBUD_1937\n            CBG: CbuG_1847\n            ABO: ABO_2414(AKR1A1)\n            ABU: Abu_0783\n            DPR: Despr_0031\n            CCE: Ccel_1009\n            SYN: slr0942\n            SYZ: MYO_118450\n            SYY: SYNGTS_1828(slr0942)\n            SYT: SYNGTI_1828(slr0942)\n            SYS: SYNPCCN_1827(slr0942)\n            SYQ: SYNPCCP_1827(slr0942)\n            SYJ: D082_20560\n            SYG: sync_0822\n            CYT: cce_0782\n            PPN: Palpr_2608\n            SRU: SRU_1267\n            SRM: SRM_01456\n            CMR: Cycma_3529\n            MTT: Ftrac_3685\n            GFO: GFO_1874\n            ZPR: ZPR_1868\n            CAT: CA2559_11288\n            MAC: MA_0403\n            MBA: Mbar_A0771\n            MST: Msp_0967(adh)\n            HMA: pNG7101(adh8)\n            HWA: HQ_1729A\n///"
query <- .flatFileParser(txt)

class(query)
## [1] "list"
str(query)
## List of 1
##  $ :List of 8
##   ..$ ENTRY     : Named chr "K00002"
##   .. ..- attr(*, "names")= chr "KO"
##   ..$ NAME      : chr "AKR1A1, adh"
##   ..$ DEFINITION: chr "alcohol dehydrogenase (NADP+) [EC:1.1.1.2]"
##   ..$ PATHWAY   : Named chr [1:5] "Glycolysis / Gluconeogenesis" "Pentose and glucuronate interconversions" "Glycerolipid metabolism" "Caprolactam degradation" ...
##   .. ..- attr(*, "names")= chr [1:5] "ko00010" "ko00040" "ko00561" "ko00930" ...
##   ..$ MODULE    : Named chr "Glucuronate pathway (uronate pathway)"
##   .. ..- attr(*, "names")= chr "M00014"
##   ..$ BRITE     : chr [1:32] "KEGG Orthology (KO) [BR:ko00001]" " Metabolism" "  Overview" "   01220 Degradation of aromatic compounds" ...
##   ..$ DBLINKS   : chr [1:3] "RN: R00746 R01041 R01481 R05231" "COG: COG0656" "GO: 0008106"
##   ..$ GENES     : chr [1:185] "HSA: 10327(AKR1A1)" "PTR: 741418(AKR1A1)" "PPS: 100990701(AKR1A1)" "GGO: 101124639(AKR1A1)" ...
query[[1]]$MODULE
##                                  M00014 
## "Glucuronate pathway (uronate pathway)"
name <- query[[1]]$NAME

definition <- query[[1]]$DEFINITION

# %>% tube function used for manipulate the results already got
pathway <- query[[1]]$PATHWAY %>% paste(collapse = " | ")

pathway
## [1] "Glycolysis / Gluconeogenesis | Pentose and glucuronate interconversions | Glycerolipid metabolism | Caprolactam degradation | Degradation of aromatic compounds"
pathway_id <- names(query[[1]]$PATHWAY) %>% paste(collapse = " | ")

kegg_info = matrix(nrow = length(KO_list),
                   ncol=4,
                   dimnames = list(KO_list,c("name","definition","pathway_id","pathway")))

kegg_info[1,] <- c(name,definition,pathway_id,pathway)

write.table(kegg_info,"kegg_info.tsv",sep="\t",col.names = NA,quote=F)