rm(list = ls())
library(XML);library(RCurl) #//载入软件包,请先自行安装
id <- "HMDB0000001"
pathways <- function(id){#//自定义函数pathways,函数参数为HMDB代谢物的ID,如乳酸(Lactate)的id是[HMDB0000190](https://hmdb.ca/metabolites/HMDB0000190)
url <- paste('https://hmdb.ca/metabolites/',id,'.xml',sep = '')#//获取该id的HMDB网址
wp <- getURL(url) #//得到当前网址的网页内容,有点慢,跟网速有关
root <- xmlRoot(xmlParse(wp)) #//解析网页内容并得到所有根节点
paths <- xmlChildren(root[[25]][[4]]) #//代谢物相关pathway内容位于根节点25,其下的子节点4
pathways <- lapply(paths,function(x) xmlValue(x[[1]][[1]])) #//返回所有相关pathway的内容,返回值为列表
return(pathways)
}
pathways("HMDB0000001") #//使用乳酸的HMDB id 进行查询,不能少了引号
## $pathway
## [1] "Histidine Metabolism"
##
## $pathway
## [1] "Histidinemia"
do.call("rbind",pathways("HMDB0000001") )
## [,1]
## pathway "Histidine Metabolism"
## pathway "Histidinemia"
#######################################
#BiocManager::install("hmdbQuery")
library(hmdbQuery)
data(hmdb_disease)
head(hmdb_disease)
## DataFrame with 6 rows and 3 columns
## accession name
## <character> <character>
## 1 HMDB0000001 1-Methylhistidine
## 2 HMDB0000001 1-Methylhistidine
## 3 HMDB0000001 1-Methylhistidine
## 4 HMDB0000001 1-Methylhistidine
## 5 HMDB0000002 1,3-Diaminopropane
## 6 HMDB0000002 1,3-Diaminopropane
## disease
## <character>
## 1 Alzheimer's disease
## 2 Diabetes mellitus type 2
## 3 Kidney disease
## 4 Obesity
## 5 Perillyl alcohol administration for cancer treatment
## 6 Leukemia
data_1 <- hmdb_disease$accession
length(data_1)
## [1] 75360
data_1 <- unique(data_1)
length(data_1) #[1] 74507
## [1] 74507
################################################################download
data_list_pathway <- data_list_disease <- list()
#for (i in 1:length(data_1)) {
i = 1
id <- data_1[i]
url <- paste('https://hmdb.ca/metabolites/',id,'.xml',sep = '')
wp <- getURL(url)
root <- xmlRoot(xmlParse(wp))
list_1 <- xmlToList(root)
print(names(list_1))
## [1] "version" "creation_date"
## [3] "update_date" "accession"
## [5] "status" "secondary_accessions"
## [7] "name" "description"
## [9] "synonyms" "chemical_formula"
## [11] "average_molecular_weight" "monisotopic_molecular_weight"
## [13] "iupac_name" "traditional_iupac"
## [15] "cas_registry_number" "smiles"
## [17] "inchi" "inchikey"
## [19] "taxonomy" "ontology"
## [21] "state" "experimental_properties"
## [23] "predicted_properties" "spectra"
## [25] "biological_properties" "normal_concentrations"
## [27] "abnormal_concentrations" "diseases"
## [29] "kegg_id" "foodb_id"
## [31] "chemspider_id" "drugbank_id"
## [33] "pdb_id" "chebi_id"
## [35] "pubchem_compound_id" "biocyc_id"
## [37] "wikipedia_id" "knapsack_id"
## [39] "phenol_explorer_compound_id" "bigg_id"
## [41] "metlin_id" "vmh_id"
## [43] "fbonto_id" "synthesis_reference"
## [45] "general_references" "protein_associations"
pathway_1 <- do.call("rbind", list_1[[25]][[4]])
pathway_1 <- data.frame(pathway_1)
pathway_1[] <- sapply(pathway_1[], as.character)
disease_1 <- do.call("rbind", list_1[[28]])
class(pathway_1)
## [1] "data.frame"
disease_1 <- data.frame(disease_1)
class(disease_1)
## [1] "data.frame"
disease_1[] <- sapply(disease_1[], as.character)
###############
dim(pathway_1)
## [1] 2 3
dim(disease_1)
## [1] 10 3
#View(disease_1)
#View(pathway_1)
#View(data_all
pathway_1$id <- id
disease_1$id <- id
pathway_1$id_name <- list_1[[7]]
disease_1$id_name <- list_1[[7]]
data_list_pathway[[i]] <- pathway_1
data_list_disease[[i]] <- disease_1
#}
pathway_all <- do.call("rbind", data_list_pathway)
disease_all <- do.call("rbind", data_list_pathway)
head(pathway_all)
## name smpdb_id kegg_map_id id
## pathway Histidine Metabolism SMP00044 map00340 HMDB0000001
## pathway.1 Histidinemia SMP00191 NULL HMDB0000001
## id_name
## pathway 1-Methylhistidine
## pathway.1 1-Methylhistidine
head(disease_all)
## name smpdb_id kegg_map_id id
## pathway Histidine Metabolism SMP00044 map00340 HMDB0000001
## pathway.1 Histidinemia SMP00191 NULL HMDB0000001
## id_name
## pathway 1-Methylhistidine
## pathway.1 1-Methylhistidine
write.csv(pathway_all, paste0(Sys.Date(),"-","all_hmdb_pathway.csv"),row.names = FALSE)
write.csv(disease_all, paste0(Sys.Date(),"-","all_hmdb_disease.csv"),row.names = FALSE)
#ttps://www.bioconductor.org/packages/release/bioc/vignettes/hmdbQuery/inst/doc/hmdbQuery.html