rm(list = ls())
library(DBI)
## connect to the sqlite file
###############################1.1 input data 
dir_path <- "C:/Users/liyix/OneDrive/Desktop/"
dir_path_name <- dir(dir_path,pattern = "cas_chembl.txt", full.names = T)
##############################1 signal input
object_file <- read.table(dir_path_name,header = T,stringsAsFactors = F)
dim(object_file) #[1] 7192    3
## [1] 7192    3
colnames(object_file)
## [1] "TS_CASRN.v5a" "cid"          "chembl"
#View(object_file)
object_file <- unique(object_file)
object_file <- na.omit(object_file)
dim(object_file) #[1] 6915    3
## [1] 6915    3
###########1.2
dir_path_1 <- dir_path 
dir_path_name_1 <- dir(dir_path_1,pattern = "*.db", full.names = T)
dir_path_name_1 #[1] "chembl_24.db"
## [1] "C:/Users/liyix/OneDrive/Desktop/chembl_25.db"
##############################signal input
con <- dbConnect(RSQLite::SQLite(), dbname = dir_path_name_1)
# get a list of all tables
alltables = dbListTables(con)
alltables
##  [1] "action_type"                   "activities"                   
##  [3] "activity_properties"           "activity_smid"                
##  [5] "activity_stds_lookup"          "activity_supp"                
##  [7] "activity_supp_map"             "assay_class_map"              
##  [9] "assay_classification"          "assay_parameters"             
## [11] "assay_type"                    "assays"                       
## [13] "atc_classification"            "binding_sites"                
## [15] "bio_component_sequences"       "bioassay_ontology"            
## [17] "biotherapeutic_components"     "biotherapeutics"              
## [19] "cell_dictionary"               "chembl_id_lookup"             
## [21] "component_class"               "component_domains"            
## [23] "component_go"                  "component_sequences"          
## [25] "component_synonyms"            "compound_properties"          
## [27] "compound_records"              "compound_structural_alerts"   
## [29] "compound_structures"           "confidence_score_lookup"      
## [31] "curation_lookup"               "data_validity_lookup"         
## [33] "defined_daily_dose"            "docs"                         
## [35] "domains"                       "drug_indication"              
## [37] "drug_mechanism"                "formulations"                 
## [39] "frac_classification"           "go_classification"            
## [41] "hrac_classification"           "indication_refs"              
## [43] "irac_classification"           "ligand_eff"                   
## [45] "mechanism_refs"                "metabolism"                   
## [47] "metabolism_refs"               "molecule_atc_classification"  
## [49] "molecule_dictionary"           "molecule_frac_classification" 
## [51] "molecule_hierarchy"            "molecule_hrac_classification" 
## [53] "molecule_irac_classification"  "molecule_synonyms"            
## [55] "organism_class"                "patent_use_codes"             
## [57] "predicted_binding_domains"     "product_patents"              
## [59] "products"                      "protein_class_synonyms"       
## [61] "protein_classification"        "protein_family_classification"
## [63] "relationship_type"             "research_companies"           
## [65] "research_stem"                 "site_components"              
## [67] "source"                        "sqlite_stat1"                 
## [69] "structural_alert_sets"         "structural_alerts"            
## [71] "target_components"             "target_dictionary"            
## [73] "target_relations"              "target_type"                  
## [75] "tissue_dictionary"             "usan_stems"                   
## [77] "variant_sequences"             "version"
##2 fetch all 
#2.1
for (i in 5286:5286) {
  i = 5286
  print(i)
  tryCatch({
    sql <- paste("SELECT DISTINCT molecule_dictionary.molregno,molecule_dictionary.pref_name AS chemical_name,molecule_dictionary.chembl_id,
            activities.assay_id,activities.doc_id, assays.tid, assays.description,assays.assay_type,
               target_dictionary.target_type,target_dictionary.pref_name AS target_dictionary_pref_name, target_dictionary.organism,
               compound_structures.canonical_smiles,compound_records.compound_name,
               docs.journal,docs.volume,docs.issue,docs.first_page,docs.last_page,docs.pubmed_id,
               docs.doi,docs.title,docs.doc_type,docs.authors,docs.abstract,docs.patent_id,
               component_synonyms.component_id,component_synonyms.component_synonym",
                 
                 "FROM",
                 "molecule_dictionary JOIN activities ON molecule_dictionary.molregno = activities.molregno",
                 "JOIN assays ON activities.assay_id = assays.assay_id",
                 "JOIN target_dictionary ON assays.tid = target_dictionary.tid",
                 "JOIN compound_structures ON molecule_dictionary.molregno = compound_structures.molregno",
                 "JOIN compound_records ON compound_structures.molregno = compound_records.molregno",
                 "JOIN docs ON compound_records.doc_id = docs.doc_id",
                 "JOIN target_components ON target_dictionary.tid = target_components.tid",
                 "JOIN component_synonyms ON target_components.component_id = component_synonyms.component_id",
                 
                 "WHERE",
                 " molecule_dictionary.chembl_id LIKE",  paste0("'%",object_file$chembl[i],"'"),"AND",
                 " target_dictionary.organism LIKE '%Homo sapiens%'","AND",
                 " target_dictionary.target_type LIKE '%SINGLE PROTEIN%'",
                 sep=" ") 
  #2.2
  rs <- dbGetQuery(con,sql)
 
  #2.3
  if (nrow(rs) != 0) {
    dir.create(paste0(dir_path,object_file$chembl[i],"\\"))
    rs$chembl <- object_file$chembl[i]
    rs$cas <- object_file$TS_CASRN.v5a[i]
    out_put_dir <- paste0(dir_path,object_file$chembl[i],"\\",Sys.Date(),"-",object_file$chembl[i],"-chembl_info.csv")
    write.csv(rs,out_put_dir,na= " ",row.names = F)
  }
  
  
},error=function(e){cat("ERROR :",conditionMessage(e),"\n")})
}
## [1] 5286
#3 disnnect
dbDisconnect(con)
dim(rs)
## [1] 13528    29
head(rs)
##   molregno                       chemical_name   chembl_id assay_id doc_id tid
## 1    43756 2,3,7,8-TETRACHLORODIBENZO-P-DIOXIN CHEMBL30327   774665  59306   3
## 2    43756 2,3,7,8-TETRACHLORODIBENZO-P-DIOXIN CHEMBL30327   774665  59306   3
## 3    43756 2,3,7,8-TETRACHLORODIBENZO-P-DIOXIN CHEMBL30327   774665  59306   3
## 4    43756 2,3,7,8-TETRACHLORODIBENZO-P-DIOXIN CHEMBL30327   774665  59306   3
## 5    43756 2,3,7,8-TETRACHLORODIBENZO-P-DIOXIN CHEMBL30327   774665  59306   3
## 6    43756 2,3,7,8-TETRACHLORODIBENZO-P-DIOXIN CHEMBL30327   774665  59306   3
##                                                                         description
## 1 DRUGMATRIX: Phosphodiesterase PDE5 enzyme inhibition (substrate: [3H]cGMP + cGMP)
## 2 DRUGMATRIX: Phosphodiesterase PDE5 enzyme inhibition (substrate: [3H]cGMP + cGMP)
## 3 DRUGMATRIX: Phosphodiesterase PDE5 enzyme inhibition (substrate: [3H]cGMP + cGMP)
## 4 DRUGMATRIX: Phosphodiesterase PDE5 enzyme inhibition (substrate: [3H]cGMP + cGMP)
## 5 DRUGMATRIX: Phosphodiesterase PDE5 enzyme inhibition (substrate: [3H]cGMP + cGMP)
## 6 DRUGMATRIX: Phosphodiesterase PDE5 enzyme inhibition (substrate: [3H]cGMP + cGMP)
##   assay_type    target_type target_dictionary_pref_name     organism
## 1          B SINGLE PROTEIN        Phosphodiesterase 5A Homo sapiens
## 2          B SINGLE PROTEIN        Phosphodiesterase 5A Homo sapiens
## 3          B SINGLE PROTEIN        Phosphodiesterase 5A Homo sapiens
## 4          B SINGLE PROTEIN        Phosphodiesterase 5A Homo sapiens
## 5          B SINGLE PROTEIN        Phosphodiesterase 5A Homo sapiens
## 6          B SINGLE PROTEIN        Phosphodiesterase 5A Homo sapiens
##                   canonical_smiles                           compound_name
## 1 Clc1cc2Oc3cc(Cl)c(Cl)cc3Oc2cc1Cl 2,3,7,8-Tetrachloro-dibenzo[1,4]dioxine
## 2 Clc1cc2Oc3cc(Cl)c(Cl)cc3Oc2cc1Cl 2,3,7,8-Tetrachloro-dibenzo[1,4]dioxine
## 3 Clc1cc2Oc3cc(Cl)c(Cl)cc3Oc2cc1Cl 2,3,7,8-Tetrachloro-dibenzo[1,4]dioxine
## 4 Clc1cc2Oc3cc(Cl)c(Cl)cc3Oc2cc1Cl 2,3,7,8-Tetrachloro-dibenzo[1,4]dioxine
## 5 Clc1cc2Oc3cc(Cl)c(Cl)cc3Oc2cc1Cl 2,3,7,8-Tetrachloro-dibenzo[1,4]dioxine
## 6 Clc1cc2Oc3cc(Cl)c(Cl)cc3Oc2cc1Cl 2,3,7,8-Tetrachloro-dibenzo[1,4]dioxine
##                    journal volume issue first_page last_page pubmed_id
## 1 Bioorg. Med. Chem. Lett.     14     1        137       141  14684315
## 2 Bioorg. Med. Chem. Lett.     14     1        137       141  14684315
## 3 Bioorg. Med. Chem. Lett.     14     1        137       141  14684315
## 4 Bioorg. Med. Chem. Lett.     14     1        137       141  14684315
## 5 Bioorg. Med. Chem. Lett.     14     1        137       141  14684315
## 6 Bioorg. Med. Chem. Lett.     14     1        137       141  14684315
##                          doi
## 1 10.1016/j.bmcl.2003.10.002
## 2 10.1016/j.bmcl.2003.10.002
## 3 10.1016/j.bmcl.2003.10.002
## 4 10.1016/j.bmcl.2003.10.002
## 5 10.1016/j.bmcl.2003.10.002
## 6 10.1016/j.bmcl.2003.10.002
##                                                                                                           title
## 1 An electrochemical device for the assay of the interaction between a dioxin receptor and its various ligands.
## 2 An electrochemical device for the assay of the interaction between a dioxin receptor and its various ligands.
## 3 An electrochemical device for the assay of the interaction between a dioxin receptor and its various ligands.
## 4 An electrochemical device for the assay of the interaction between a dioxin receptor and its various ligands.
## 5 An electrochemical device for the assay of the interaction between a dioxin receptor and its various ligands.
## 6 An electrochemical device for the assay of the interaction between a dioxin receptor and its various ligands.
##      doc_type                                                      authors
## 1 PUBLICATION Murata M, Gonda H, Yano K, Kuroki S, Suzutani T, Katayama Y.
## 2 PUBLICATION Murata M, Gonda H, Yano K, Kuroki S, Suzutani T, Katayama Y.
## 3 PUBLICATION Murata M, Gonda H, Yano K, Kuroki S, Suzutani T, Katayama Y.
## 4 PUBLICATION Murata M, Gonda H, Yano K, Kuroki S, Suzutani T, Katayama Y.
## 5 PUBLICATION Murata M, Gonda H, Yano K, Kuroki S, Suzutani T, Katayama Y.
## 6 PUBLICATION Murata M, Gonda H, Yano K, Kuroki S, Suzutani T, Katayama Y.
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      abstract
## 1 Aryl hydrocarbon receptor (AhR) is a ligand-activated transcription factor that mediates the toxic and biological effects of a variety of chemicals. Although a significant amount of information is available with respect to the planar aromatic hydrocarbon AhR ligands, information on the actual spectrum of chemical structures that can bind to and activate the AhR is insufficient. In order to determine the binding affinities of chemicals to the human AhR (hAhR), we constructed an electrochemical system which carries the hAhR ligand-binding domain on the electrode surface. The recombinant hAhR ligand-binding domain that was expressed in Escherichia coli using a T7 expression system was immobilized on a gold electrode. The specificity of this biosensor based on a ligand-receptor interaction was comparable to other in vitro screening methods. The receptor-modified electrode can rapidly detect the binding of ligands to hAhR. The electrochemical measurement can be carried out within just 5 min. This electrochemical screening system is rapid, low in cost, and adaptable to high-throughput applications without sacrificing either sensitivity or selectivity.
## 2 Aryl hydrocarbon receptor (AhR) is a ligand-activated transcription factor that mediates the toxic and biological effects of a variety of chemicals. Although a significant amount of information is available with respect to the planar aromatic hydrocarbon AhR ligands, information on the actual spectrum of chemical structures that can bind to and activate the AhR is insufficient. In order to determine the binding affinities of chemicals to the human AhR (hAhR), we constructed an electrochemical system which carries the hAhR ligand-binding domain on the electrode surface. The recombinant hAhR ligand-binding domain that was expressed in Escherichia coli using a T7 expression system was immobilized on a gold electrode. The specificity of this biosensor based on a ligand-receptor interaction was comparable to other in vitro screening methods. The receptor-modified electrode can rapidly detect the binding of ligands to hAhR. The electrochemical measurement can be carried out within just 5 min. This electrochemical screening system is rapid, low in cost, and adaptable to high-throughput applications without sacrificing either sensitivity or selectivity.
## 3 Aryl hydrocarbon receptor (AhR) is a ligand-activated transcription factor that mediates the toxic and biological effects of a variety of chemicals. Although a significant amount of information is available with respect to the planar aromatic hydrocarbon AhR ligands, information on the actual spectrum of chemical structures that can bind to and activate the AhR is insufficient. In order to determine the binding affinities of chemicals to the human AhR (hAhR), we constructed an electrochemical system which carries the hAhR ligand-binding domain on the electrode surface. The recombinant hAhR ligand-binding domain that was expressed in Escherichia coli using a T7 expression system was immobilized on a gold electrode. The specificity of this biosensor based on a ligand-receptor interaction was comparable to other in vitro screening methods. The receptor-modified electrode can rapidly detect the binding of ligands to hAhR. The electrochemical measurement can be carried out within just 5 min. This electrochemical screening system is rapid, low in cost, and adaptable to high-throughput applications without sacrificing either sensitivity or selectivity.
## 4 Aryl hydrocarbon receptor (AhR) is a ligand-activated transcription factor that mediates the toxic and biological effects of a variety of chemicals. Although a significant amount of information is available with respect to the planar aromatic hydrocarbon AhR ligands, information on the actual spectrum of chemical structures that can bind to and activate the AhR is insufficient. In order to determine the binding affinities of chemicals to the human AhR (hAhR), we constructed an electrochemical system which carries the hAhR ligand-binding domain on the electrode surface. The recombinant hAhR ligand-binding domain that was expressed in Escherichia coli using a T7 expression system was immobilized on a gold electrode. The specificity of this biosensor based on a ligand-receptor interaction was comparable to other in vitro screening methods. The receptor-modified electrode can rapidly detect the binding of ligands to hAhR. The electrochemical measurement can be carried out within just 5 min. This electrochemical screening system is rapid, low in cost, and adaptable to high-throughput applications without sacrificing either sensitivity or selectivity.
## 5 Aryl hydrocarbon receptor (AhR) is a ligand-activated transcription factor that mediates the toxic and biological effects of a variety of chemicals. Although a significant amount of information is available with respect to the planar aromatic hydrocarbon AhR ligands, information on the actual spectrum of chemical structures that can bind to and activate the AhR is insufficient. In order to determine the binding affinities of chemicals to the human AhR (hAhR), we constructed an electrochemical system which carries the hAhR ligand-binding domain on the electrode surface. The recombinant hAhR ligand-binding domain that was expressed in Escherichia coli using a T7 expression system was immobilized on a gold electrode. The specificity of this biosensor based on a ligand-receptor interaction was comparable to other in vitro screening methods. The receptor-modified electrode can rapidly detect the binding of ligands to hAhR. The electrochemical measurement can be carried out within just 5 min. This electrochemical screening system is rapid, low in cost, and adaptable to high-throughput applications without sacrificing either sensitivity or selectivity.
## 6 Aryl hydrocarbon receptor (AhR) is a ligand-activated transcription factor that mediates the toxic and biological effects of a variety of chemicals. Although a significant amount of information is available with respect to the planar aromatic hydrocarbon AhR ligands, information on the actual spectrum of chemical structures that can bind to and activate the AhR is insufficient. In order to determine the binding affinities of chemicals to the human AhR (hAhR), we constructed an electrochemical system which carries the hAhR ligand-binding domain on the electrode surface. The recombinant hAhR ligand-binding domain that was expressed in Escherichia coli using a T7 expression system was immobilized on a gold electrode. The specificity of this biosensor based on a ligand-receptor interaction was comparable to other in vitro screening methods. The receptor-modified electrode can rapidly detect the binding of ligands to hAhR. The electrochemical measurement can be carried out within just 5 min. This electrochemical screening system is rapid, low in cost, and adaptable to high-throughput applications without sacrificing either sensitivity or selectivity.
##   patent_id component_id                            component_synonym
## 1      <NA>          124                                     3.1.4.35
## 2      <NA>          124                                      CGB-PDE
## 3      <NA>          124                                         PDE5
## 4      <NA>          124                                        PDE5A
## 5      <NA>          124 cGMP-binding cGMP-specific phosphodiesterase
## 6      <NA>          124 cGMP-specific 3',5'-cyclic phosphodiesterase
##        chembl       cas
## 1 CHEMBL30327 1746-01-6
## 2 CHEMBL30327 1746-01-6
## 3 CHEMBL30327 1746-01-6
## 4 CHEMBL30327 1746-01-6
## 5 CHEMBL30327 1746-01-6
## 6 CHEMBL30327 1746-01-6