#api
rm(list = ls())
###############################input data
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\data\\"
dir_path_name <- list.files(pattern = ".*csv",dir_path,full.names = T, recursive = T)
dir_path_name
## [1] "C:\\Users\\liyix\\OneDrive\\Desktop\\data\\2022-09-19-pubmed_search.csv"
## [2] "C:\\Users\\liyix\\OneDrive\\Desktop\\data\\data.csv"
#setwd(dir_path)
data_1 <- read.csv(grep("data.csv",dir_path_name,value = T),header = T,stringsAsFactors = F)
dim(data_1) #[1] 719 20
## [1] 719 20
#length(unique(data_1$Accession)) #[1] 20
head(data_1, 2)
## DrugBank.ID source Name CAS.Number Drug.Groups InChIKey InChI
## 1 influenza A_drug Tilorone
## 2 HBV_drug Tilorone
## SMILES Formula KEGG.Compound.ID
## 1 CCN(CC)CCOc1ccc2-c3ccc(OCCN(CC)CC)cc3C(=O)c2c1
## 2 CCN(CC)CCOc1ccc2-c3ccc(OCCN(CC)CC)cc3C(=O)c2c1
## KEGG.Drug.ID PubChem.Compound.ID PubChem.Substance.ID ChEBI.ID ChEMBL.ID
## 1 NA NA NA
## 2 NA NA NA
## HET.ID ChemSpider.ID BindingDB.ID Phase count
## 1 NA NA approve_2 46
## 2 NA NA approve_2 46
dim(data_1)
## [1] 719 20
#table(data_1$source)
#sum(is.na(data_1$Name)) #0
#data_1$source <- gsub("_drug","", data_1$source)
#unique(data_1$source)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(RJSONIO)
library(httr)
data_list <- list()
##########nrow(data_1)
for (i in 600:600) {
#i = 600
tryCatch({
print(i)
#i = 5
#i =12
Sys.sleep(runif(1)*3)
data_2 <- data_1[i,]
vec_1 <- c(data_2$Name, data_2$source)
vec_1 <- paste(vec_1[1], vec_1[2], sep = "+")
vec_1 <- gsub(" ", "+", vec_1)
vec_query <- paste0("https://www.ncbi.nlm.nih.gov/research/litsense-api/api/?query=", vec_1, "&rerank=true")
print(c(vec_1,vec_query))
res <- GET(vec_query)
#res$content
data <- fromJSON(rawToChar(res$content))
#length(data)
data_4 <- do.call("rbind",data)
class(data_4)
data_3 <- data.frame(data_4)
print(dim(data_3)) #[1] 100 6
data_3$source <- data_2$source
data_3$Name <- data_2$Name
################################
data_3[] <- sapply(data_3[], as.character)
#data_4$text <- as.character(data_4$text)
#View(data_4)
#str(data_4)
#unique(data_4$Name)
data_3 <- data_3[grep(unique(data_3$Name), data_3$text), ]
#View(data_3)
data_list[[i]] <- data_3
}, error = function(e) {
cat("ERROR :",conditionMessage(e), "\n")
cat("ERROR :", conditionMessage(e),"---",i,"---",gsub("\\:","-",Sys.time()),file = "error.txt", append = TRUE, "\n")
})
}
## [1] 600
## [1] "Siponimod+COVID19_drug"
## [2] "https://www.ncbi.nlm.nih.gov/research/litsense-api/api/?query=Siponimod+COVID19_drug&rerank=true"
## [1] 100 6
data_5 <- do.call("rbind", data_list)
#view(data_4)
#dim(data_4)
#class(data_4)
#str(data_4)
#View(data_4)
#str(data_4$text[1])
dim(data_5) #[1] 14 8
## [1] 20 8
data_5$Name[1]; data_5$source[1]
## [1] "Siponimod"
## [1] "COVID19_drug"
data_5$text[1]
## [1] "Siponimod and ozanimod provide alternatives to fingolimod (approved in 2010) for treating relapsing forms of multiple sclerosis by modulating sphingosine-1-phosphate receptor."
#data_4 <- data_4[grep(unique(data_4$source), data_4$text), ]
write.csv(data_4, paste0(dir_path,Sys.Date(),"-","pubmed_search.csv"),row.names = FALSE,na = "")
############ref https://www.ncbi.nlm.nih.gov/research/litsense/?view=tutorial#tut-api