setwd("~/Desktop/test/")
source("parsers.R")
source("utilities.R")
library(magrittr)
KO_list = read.table("KO_list.txt",stringsAsFactors = F)$V1
KO_list
## [1] "K00002" "K00003" "K00004"
print(1)
## [1] 1
KO <- KO_list[1]
txt <- paste(readLines(paste0("kegg_api/",KO)),collapse = "\n")
head(txt)
## [1] "ENTRY K00002 KO\nNAME AKR1A1, adh\nDEFINITION alcohol dehydrogenase (NADP+) [EC:1.1.1.2]\nPATHWAY ko00010 Glycolysis / Gluconeogenesis\n ko00040 Pentose and glucuronate interconversions\n ko00561 Glycerolipid metabolism\n ko00930 Caprolactam degradation\n ko01220 Degradation of aromatic compounds\nMODULE M00014 Glucuronate pathway (uronate pathway)\nBRITE KEGG Orthology (KO) [BR:ko00001]\n Metabolism\n Overview\n 01220 Degradation of aromatic compounds\n K00002 AKR1A1, adh; alcohol dehydrogenase (NADP+)\n Carbohydrate metabolism\n 00010 Glycolysis / Gluconeogenesis\n K00002 AKR1A1, adh; alcohol dehydrogenase (NADP+)\n 00040 Pentose and glucuronate interconversions\n K00002 AKR1A1, adh; alcohol dehydrogenase (NADP+)\n Lipid metabolism\n 00561 Glycerolipid metabolism\n K00002 AKR1A1, adh; alcohol dehydrogenase (NADP+)\n Xenobiotics biodegradation and metabolism\n 00930 Caprolactam degradation\n K00002 AKR1A1, adh; alcohol dehydrogenase (NADP+)\n KEGG modules [BR:ko00002]\n Pathway module\n Carbohydrate and lipid metabolism\n Other carbohydrate metabolism\n M00014 Glucuronate pathway (uronate pathway)\n K00002 AKR1A1, adh; alcohol dehydrogenase (NADP+)\n Enzymes [BR:ko01000]\n 1. Oxidoreductases\n 1.1 Acting on the CH-OH group of donors\n 1.1.1 With NAD+ or NADP+ as acceptor\n 1.1.1.2 alcohol dehydrogenase (NADP+)\n K00002 AKR1A1, adh; alcohol dehydrogenase (NADP+)\n Exosome [BR:ko04147]\n Exosomal proteins\n Exosomal proteins of other body fluids (saliva and urine)\n K00002 AKR1A1, adh; alcohol dehydrogenase (NADP+)\nDBLINKS RN: R00746 R01041 R01481 R05231\n COG: COG0656\n GO: 0008106\nGENES HSA: 10327(AKR1A1)\n PTR: 741418(AKR1A1)\n PPS: 100990701(AKR1A1)\n GGO: 101124639(AKR1A1)\n PON: 100173796(AKR1A1)\n NLE: 100581229(AKR1A1)\n MCC: 693380(AKR1A1)\n MCF: 102143796(AKR1A1)\n CSAB: 103224909(AKR1A1)\n RRO: 104662135(AKR1A1)\n CJC: 100404430(AKR1A1)\n SBQ: 101054314(AKR1A1)\n MMU: 58810(Akr1a1)\n RNO: 78959(Akr1a1)\n CGE: 100689106(Akr1a1)\n NGI: 103737782(Akr1a1)\n HGL: 101710959(Akr1a1)\n OCU: 100343223 100346892\n TUP: 102474258(AKR1A1)\n CFA: 610537(AKR1A1)\n AML: 100466911(AKR1A1)\n UMR: 103670772(AKR1A1)\n FCA: 101091464(AKR1A1)\n PTG: 102953655(AKR1A1)\n BTA: 618607(AKR1A1)\n BOM: 102278148(AKR1A1)\n PHD: 102329994(AKR1A1)\n CHX: 102181747(AKR1A1)\n OAS: 101121637(AKR1A1)\n SSC: 396924(AKR1A1)\n CFR: 102508336(AKR1A1)\n BACU: 103012987(AKR1A1)\n LVE: 103076421(AKR1A1)\n ECB: 100052360(AKR1A1)\n MYB: 102261432(AKR1A1)\n MYD: 102763234(AKR1A1)\n PALE: 102889346(AKR1A1)\n LAV: 100674353\n MDO: 100024617(AKR1A1)\n SHR: 100933920(AKR1A1)\n OAA: 100076515(AKR1A1)\n GGA: 424599(AKR1A1)\n MGP: 100547531(AKR1A1)\n CJO: 107317439(AKR1A1)\n APLA: 101804381(AKR1A1)\n TGU: 100220342(AKR1A1)\n GFR: 102039134(AKR1A1)\n FAB: 101814677(AKR1A1)\n PHI: 102111381(AKR1A1)\n CCW: 104695240(AKR1A1)\n FPG: 101914281(AKR1A1)\n FCH: 102058485(AKR1A1)\n CLV: 102086095(AKR1A1)\n AAM: 106485925(AKR1A1)\n ASN: 102381941(AKR1A1)\n AMJ: 102560934(AKR1A1)\n PSS: 102453813(AKR1A1)\n CMY: 102932885(AKR1A1)\n ACS: 100553683(akr1a1)\n PBI: 103067202(AKR1A1)\n GJA: 107107909(AKR1A1)\n XLA: 444488(akr1a1.L)\n XTR: 548891(akr1a1)\n DRE: 445326(akr1a1a) 799805(akr1a1b)\n TRU: 101075563 101077149(akr1a1)\n TNG: GSTEN00028647G001\n MZE: 101467748 101476039(akr1a1)\n OLA: 101172969(akr1a1) 101175168\n XMA: 102234019 102236025(akr1a1)\n SASA: 106560437(AK1A1) 106580289 106584055(AK1A1)\n LCM: 102347995 102356304(AKR1A1)\n CMK: 103176872 103182771(akr1a1)\n SPU: 587281\n SKO: 100379040\n DME: Dmel_CG2767\n HRO: HELRODRAFT_101948 HELRODRAFT_90848\n NVE: NEMVE_v1g181888 NEMVE_v1g224560\n HMG: 100200051\n TAD: TRIADDRAFT_29280 TRIADDRAFT_50776\n AQU: 100632875\n CME: CYME_CMM296C\n GSL: Gasu_23120\n SCE: YCR105W(ADH7) YMR318C(ADH6)\n ERC: Ecym_1093\n KLA: KLLA0E19889g\n LTH: KLTH0B04972g KLTH0E07964g\n VPO: Kpol_1057p24 Kpol_257p4 Kpol_526p18 Kpol_529p30\n ZRO: ZYRO0G10318g\n CGR: CAGL0H06853g CAGL0M14047g\n NCS: NCAS_0C03020(NCAS0C03020) NCAS_0D01220(NCAS0D01220)\n NDI: NDAI_0A08470(NDAI0A08470) NDAI_0G04880(NDAI0G04880) NDAI_0H02940(NDAI0H02940) NDAI_0H02950(NDAI0H02950)\n TPF: TPHA_0A00140(TPHA0A00140)\n TBL: TBLA_0A02730(TBLA0A02730) TBLA_0A06640(TBLA0A06640)\n TDL: TDEL_0E00230(TDEL0E00230) TDEL_0F02300(TDEL0F02300)\n KAF: KAFR_0A08730(KAFR0A08730) KAFR_0C01550(KAFR0C01550)\n PPA: PAS_chr1-1_0357 PAS_chr4_0576\n DHA: DEHA2C04488g DEHA2C04510g DEHA2C17666g\n PIC: PICST_29079(ADH4) PICST_31312(ADH5) PICST_45137(ADH7)\n PGU: PGUG_05188 PGUG_05764\n LEL: LELG_04799\n CAL: CaO19.12963(ADH7) CaO19.5517(ADH7)\n CTP: CTRG_02797\n COT: CORT_0F02920\n CDU: CD36_62710\n CLU: CLUG_03198 CLUG_03199 CLUG_03797 CLUG_05090\n NCR: NCU04823\n SMP: SMAC_06674\n PAN: PODANSg2904 PODANSg4584\n TTT: THITE_2118200\n MTM: MYCTH_112014\n MGR: MGG_00220 MGG_06489\n NHE: NECHADRAFT_37210 NECHADRAFT_68747\n MAW: MAC_03231 MAC_09408 MAC_09529\n MAJ: MAA_09669\n VAL: VDBG_02968\n SSL: SS1G_00401 SS1G_07135\n BFU: BC1G_00717 BC1G_10210\n ANI: AN2860.2 AN3030.2\n AFM: AFUA_1G09750 AFUA_3G11900\n AOR: AOR_1_1328154(AO090003000751) AOR_1_780144(AO090023000460)\n ANG: ANI_1_358144(An16g02510)\n AFV: AFLA_030660\n ACT: ACLA_039770\n NFI: NFIA_065400 NFIA_114710\n PCS: Pc18g00530\n CIM: CIMG_07810\n CPW: CPC735_041920\n PBL: PAAG_06863\n URE: UREG_02137\n ABE: ARB_05690 ARB_06120\n TVE: TRV_03693 TRV_03831\n PNO: SNOG_15556\n PTE: PTT_11113\n BZE: COCCADRAFT_35593\n BSC: COCSADRAFT_166068\n BOR: COCMIDRAFT_4361\n ZTR: MYCGRDRAFT_31826\n TML: GSTUM_00001645001\n SPO: SPAC26F1.07\n CNE: CNC05140\n CNB: CNBC2030\n CGI: CGB_C7460C\n PPL: POSPLDRAFT_106595 POSPLDRAFT_88715\n CCI: CC1G_02196\n UMA: UMAG_01861\n PGR: PGTG_17917\n MBR: MONBRDRAFT_32881\n EHI: EHI_023110(81.t00034) EHI_039190(73.t00013) EHI_198760(34.t00053)\n EDI: EDI_247390 EDI_307670\n ACAN: ACA1_054920\n PTI: PHATRDRAFT_41470\n TPS: THAPSDRAFT_40021\n PIF: PITG_21743\n TCR: 504425.60\n LMA: LMJF_23_0360\n LIF: LINJ_23_0410\n TVA: TVAG_344880\n CBD: CBUD_1937\n CBG: CbuG_1847\n ABO: ABO_2414(AKR1A1)\n ABU: Abu_0783\n DPR: Despr_0031\n CCE: Ccel_1009\n SYN: slr0942\n SYZ: MYO_118450\n SYY: SYNGTS_1828(slr0942)\n SYT: SYNGTI_1828(slr0942)\n SYS: SYNPCCN_1827(slr0942)\n SYQ: SYNPCCP_1827(slr0942)\n SYJ: D082_20560\n SYG: sync_0822\n CYT: cce_0782\n PPN: Palpr_2608\n SRU: SRU_1267\n SRM: SRM_01456\n CMR: Cycma_3529\n MTT: Ftrac_3685\n GFO: GFO_1874\n ZPR: ZPR_1868\n CAT: CA2559_11288\n MAC: MA_0403\n MBA: Mbar_A0771\n MST: Msp_0967(adh)\n HMA: pNG7101(adh8)\n HWA: HQ_1729A\n///"
query <- .flatFileParser(txt)
class(query)
## [1] "list"
str(query)
## List of 1
## $ :List of 8
## ..$ ENTRY : Named chr "K00002"
## .. ..- attr(*, "names")= chr "KO"
## ..$ NAME : chr "AKR1A1, adh"
## ..$ DEFINITION: chr "alcohol dehydrogenase (NADP+) [EC:1.1.1.2]"
## ..$ PATHWAY : Named chr [1:5] "Glycolysis / Gluconeogenesis" "Pentose and glucuronate interconversions" "Glycerolipid metabolism" "Caprolactam degradation" ...
## .. ..- attr(*, "names")= chr [1:5] "ko00010" "ko00040" "ko00561" "ko00930" ...
## ..$ MODULE : Named chr "Glucuronate pathway (uronate pathway)"
## .. ..- attr(*, "names")= chr "M00014"
## ..$ BRITE : chr [1:32] "KEGG Orthology (KO) [BR:ko00001]" " Metabolism" " Overview" " 01220 Degradation of aromatic compounds" ...
## ..$ DBLINKS : chr [1:3] "RN: R00746 R01041 R01481 R05231" "COG: COG0656" "GO: 0008106"
## ..$ GENES : chr [1:185] "HSA: 10327(AKR1A1)" "PTR: 741418(AKR1A1)" "PPS: 100990701(AKR1A1)" "GGO: 101124639(AKR1A1)" ...
query[[1]]$MODULE
## M00014
## "Glucuronate pathway (uronate pathway)"
name <- query[[1]]$NAME
definition <- query[[1]]$DEFINITION
# %>% tube function used for manipulate the results already got
pathway <- query[[1]]$PATHWAY %>% paste(collapse = " | ")
pathway
## [1] "Glycolysis / Gluconeogenesis | Pentose and glucuronate interconversions | Glycerolipid metabolism | Caprolactam degradation | Degradation of aromatic compounds"
pathway_id <- names(query[[1]]$PATHWAY) %>% paste(collapse = " | ")
kegg_info = matrix(nrow = length(KO_list),
ncol=4,
dimnames = list(KO_list,c("name","definition","pathway_id","pathway")))
kegg_info[1,] <- c(name,definition,pathway_id,pathway)
write.table(kegg_info,"kegg_info.tsv",sep="\t",col.names = NA,quote=F)