litsense_ncbi.R

#api
rm(list = ls())
###############################input data 
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\data\\"
dir_path_name <- list.files(pattern = ".*csv",dir_path,full.names = T, recursive = T)
dir_path_name

## [1] "C:\\Users\\liyix\\OneDrive\\Desktop\\data\\2022-09-19-pubmed_search.csv"
## [2] "C:\\Users\\liyix\\OneDrive\\Desktop\\data\\data.csv"

#setwd(dir_path)
data_1 <- read.csv(grep("data.csv",dir_path_name,value = T),header = T,stringsAsFactors = F)
dim(data_1) #[1] 719  20

## [1] 719  20

#length(unique(data_1$Accession)) #[1] 20
head(data_1, 2)

##   DrugBank.ID           source     Name CAS.Number Drug.Groups InChIKey InChI
## 1             influenza A_drug Tilorone                                      
## 2                     HBV_drug Tilorone                                      
##                                           SMILES Formula KEGG.Compound.ID
## 1 CCN(CC)CCOc1ccc2-c3ccc(OCCN(CC)CC)cc3C(=O)c2c1                         
## 2 CCN(CC)CCOc1ccc2-c3ccc(OCCN(CC)CC)cc3C(=O)c2c1                         
##   KEGG.Drug.ID PubChem.Compound.ID PubChem.Substance.ID ChEBI.ID ChEMBL.ID
## 1                               NA                   NA       NA          
## 2                               NA                   NA       NA          
##   HET.ID ChemSpider.ID BindingDB.ID     Phase count
## 1                   NA           NA approve_2    46
## 2                   NA           NA approve_2    46

dim(data_1)

## [1] 719  20

#table(data_1$source)
#sum(is.na(data_1$Name)) #0
#data_1$source <- gsub("_drug","", data_1$source)
#unique(data_1$source)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(RJSONIO)
library(httr)
data_list <- list()
##########nrow(data_1)
for (i in 600:600) {
  #i = 600
  tryCatch({
    print(i)
    #i = 5
    #i =12
    Sys.sleep(runif(1)*3)
    data_2 <- data_1[i,]
    vec_1 <- c(data_2$Name, data_2$source)
    vec_1 <- paste(vec_1[1], vec_1[2], sep = "+")
    vec_1 <- gsub(" ", "+", vec_1)
    vec_query <- paste0("https://www.ncbi.nlm.nih.gov/research/litsense-api/api/?query=", vec_1, "&rerank=true")
    print(c(vec_1,vec_query))
    res <- GET(vec_query)
    #res$content
    data <- fromJSON(rawToChar(res$content))
    #length(data)
    data_4 <- do.call("rbind",data)
    class(data_4)
    data_3 <- data.frame(data_4)
    print(dim(data_3)) #[1] 100   6
    data_3$source <- data_2$source
    data_3$Name <- data_2$Name
    ################################
    data_3[] <- sapply(data_3[], as.character)
    #data_4$text <- as.character(data_4$text)
    #View(data_4)
    #str(data_4)
    #unique(data_4$Name)
    data_3 <- data_3[grep(unique(data_3$Name), data_3$text), ]
    #View(data_3)
    data_list[[i]] <- data_3
  }, error = function(e) {
    cat("ERROR :",conditionMessage(e), "\n")
    cat("ERROR :", conditionMessage(e),"---",i,"---",gsub("\\:","-",Sys.time()),file = "error.txt", append = TRUE, "\n")
  })
}

## [1] 600
## [1] "Siponimod+COVID19_drug"                                                                          
## [2] "https://www.ncbi.nlm.nih.gov/research/litsense-api/api/?query=Siponimod+COVID19_drug&rerank=true"
## [1] 100   6

data_5 <- do.call("rbind", data_list)
#view(data_4)
#dim(data_4)
#class(data_4)
#str(data_4)
#View(data_4)
#str(data_4$text[1])
dim(data_5) #[1] 14  8

## [1] 20  8

data_5$Name[1]; data_5$source[1]

## [1] "Siponimod"

## [1] "COVID19_drug"

data_5$text[1]

## [1] "Siponimod and ozanimod provide alternatives to fingolimod (approved in 2010) for treating relapsing forms of multiple sclerosis by modulating sphingosine-1-phosphate receptor."

#data_4 <- data_4[grep(unique(data_4$source), data_4$text), ]
write.csv(data_4, paste0(dir_path,Sys.Date(),"-","pubmed_search.csv"),row.names = FALSE,na = "")
############ref https://www.ncbi.nlm.nih.gov/research/litsense/?view=tutorial#tut-api