rm(list = ls())
##############################input data
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\dock\\"
dir_path_name <- list.files(pattern = ".*",dir_path,full.names = T, recursive = F)
#dir_path_name
data_1 <- read.csv(grep("2022-11-16-CYP_3419.csv",dir_path_name,value = T),header = T,stringsAsFactors = F)
#dim(data_1) #[1] 10 2
#head(data_1,2)
data_1$cid <- NULL
data_1 <- data_1[1:10,]
##################################################################
library(webchem)
## Warning: 程辑包'webchem'是用R版本4.2.2 来建造的
#dim(data_1)
data_1$Sample.ID
## [1] "NCGC00013051-10" "NCGC00013058-02" "NCGC00013082-04" "NCGC00013095-10"
## [5] "NCGC00013109-03" "NCGC00013130-04" "NCGC00013225-01" "NCGC00013273-03"
## [9] "NCGC00013287-07" "NCGC00013289-09"
data_2 <- get_cid(data_1$Sample.ID)
data_2
## # A tibble: 11 × 2
## query cid
## <chr> <chr>
## 1 NCGC00013051-10 16043
## 2 NCGC00013058-02 2724411
## 3 NCGC00013082-04 444212
## 4 NCGC00013095-10 637566
## 5 NCGC00013109-03 4766
## 6 NCGC00013130-04 71166
## 7 NCGC00013130-04 79472
## 8 NCGC00013225-01 51
## 9 NCGC00013273-03 10664
## 10 NCGC00013287-07 8739
## 11 NCGC00013289-09 4904
data_2 <- data_2[!duplicated(data_2$query),]
dim(data_2) #[1] 10 2
## [1] 10 2
#######################################################
colnames(data_2)[1] <- "Sample.ID"
data_3 <- merge(data_1, data_2, by = "Sample.ID")
#dim(data_3) #[1] 3419 3
write.csv(data_3, paste0(dir_path,Sys.Date(),"-","CYP_3419_CIS.csv"),row.names = FALSE,na = "")
#######################################################bat file
###############0_pubchem cid generates .sdf file, and split individual .mol2 file using open babel software
###############1_python2_Batch production of .pdbqt files from .mol2 file
#@echo
#for %%f in (*.mol2) do (
# python prepare_ligand4.py -l %%f -A hydrogens
# timeout 0)
# exit
##############2_Docking of multiple receptors and multiple ligands
#############The folder named receptor contains the prepared receptor pdbqt file and config file (same name, e.g., cyp3a4.pdbqt and cyp3a4.txt)
#############The folder named ligand contains the prepared ligand pdbqt file (e.g., 51.pdbqt)
#############also including vina.exe
#@echo
#for %%r in (receptor\*.pdbqt) do (
# for %%l in (ligand\*.pdbqt) do (
# if not exist results mkdir results
# vina --config receptor\%%~nr.txt --receptor %%r --ligand %%l --out results\%%~nr_2_%%~nl.pdbqt --log results\%%~nr_2_%%~nl.txt
# timeout 3))
# exit
##############3_Docking of multiple receptors and multiple ligands
##############Combine best binding energy results to a txt file
#@echo off
#for %%f in (*.txt) do (
# for /f "skip=4 tokens=1,2 delims= " %%x in (%%f) do (
# if %%x==1 echo %%~nf,%%y >> results.txt))
#exit
#ref https://bioinformaticsreview.com/20200716/prepare-receptor-and-ligand-files-for-docking-using-python-scripts/