rm(list = ls())
###############################input data
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\"
dir_path_name <- dir(dir_path,pattern = "*.csv")
dir_path_name
## [1] "ECFP4.csv"
dir.create(paste0(dir_path,"ECFP4\\"))
###############################bath input data
for (i in 1:length(dir_path_name)) {
i = 1
dir_path_file <- paste0(dir_path,dir_path_name[i])
data_1 <- read.csv(dir_path_file,header = T,stringsAsFactors = F,colClasses = "character")
data_1$Col0 <- data_1$Col3 <- data_1$Col4 <- data_1$Col5 <- NULL
print(dim(data_1))
print(head(data_1,1))
data_1$source <- dir_path_name[i]
#View(data_1)
df_3 <- NULL
for (j in 1:nrow(data_1)) {
#j =1
#print(j)
df_1 <- substring(data_1[j,3],first=1:nchar(data_1[1,3]), last=1:nchar(data_1[1,3]))
df_2 <- as.data.frame(t(data.frame(df_1)))
df_3 <- rbind(df_3,df_2)
}
print(dim(df_3)) #[1] 4981 1024
#View(head(df_3))
data_4 <- cbind(data_1,df_3)
#View(head(data_4,10))
write.csv(data_4,paste0(dir_path,"ECFP4\\",Sys.Date(),"-",dir_path_name[i]),row.names = F)
write.table(data_4, paste0(dir_path,"ECFP4\\", Sys.Date(),"-","object_file.txt"),sep = "\t")
}
## [1] 87 3
## Col1 Col2
## 1 CYP3A Substrates avanafil
## Circular.fingerprints.for.ecfp4
## 1 0000000000000000000000000000000000000000010000000000000010001000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000010000000100000000000101000001000000000000000000000000100000000000000000001000000000000000001000100000000001000000000000000000000000000000100000000000000000000001000000000000010000000000001000001110000000110000000000100101000000000000100000000000000001100000000000000000000000010000000110100001000000010000000000001010000000000000000000000000000000000000000000000100000000100000000000010000000000100000000000100000000000000000010010000000000000001000000010000000000000000000010000100000000010000000000000000000000000000010010000001100000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000001000000010000000000000100000000100000000010000000000000000000000000000000000000000001010000000000000000000010000000000000000000001000000000000000000000000010010110000000001000
## [1] 87 1024
head(data_4,1)
## Col1 Col2
## df_1 CYP3A Substrates avanafil
## Circular.fingerprints.for.ecfp4
## df_1 0000000000000000000000000000000000000000010000000000000010001000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000010000000100000000000101000001000000000000000000000000100000000000000000001000000000000000001000100000000001000000000000000000000000000000100000000000000000000001000000000000010000000000001000001110000000110000000000100101000000000000100000000000000001100000000000000000000000010000000110100001000000010000000000001010000000000000000000000000000000000000000000000100000000100000000000010000000000100000000000100000000000000000010010000000000000001000000010000000000000000000010000100000000010000000000000000000000000000010010000001100000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000001000000010000000000000100000000100000000010000000000000000000000000000000000000000001010000000000000000000010000000000000000000001000000000000000000000000010010110000000001000
## source V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18
## df_1 ECFP4.csv 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54
## df_1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72
## df_1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90
## df_1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196
## df_1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241
## df_1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## V242 V243 V244 V245 V246 V247 V248 V249 V250 V251 V252 V253 V254 V255 V256
## df_1 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0
## V257 V258 V259 V260 V261 V262 V263 V264 V265 V266 V267 V268 V269 V270 V271
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V272 V273 V274 V275 V276 V277 V278 V279 V280 V281 V282 V283 V284 V285 V286
## df_1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## V287 V288 V289 V290 V291 V292 V293 V294 V295 V296 V297 V298 V299 V300 V301
## df_1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## V302 V303 V304 V305 V306 V307 V308 V309 V310 V311 V312 V313 V314 V315 V316
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## V317 V318 V319 V320 V321 V322 V323 V324 V325 V326 V327 V328 V329 V330 V331
## df_1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1
## V332 V333 V334 V335 V336 V337 V338 V339 V340 V341 V342 V343 V344 V345 V346
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V347 V348 V349 V350 V351 V352 V353 V354 V355 V356 V357 V358 V359 V360 V361
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V362 V363 V364 V365 V366 V367 V368 V369 V370 V371 V372 V373 V374 V375 V376
## df_1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V377 V378 V379 V380 V381 V382 V383 V384 V385 V386 V387 V388 V389 V390 V391
## df_1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## V392 V393 V394 V395 V396 V397 V398 V399 V400 V401 V402 V403 V404 V405 V406
## df_1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## V407 V408 V409 V410 V411 V412 V413 V414 V415 V416 V417 V418 V419 V420 V421
## df_1 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0
## V422 V423 V424 V425 V426 V427 V428 V429 V430 V431 V432 V433 V434 V435 V436
## df_1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0
## V437 V438 V439 V440 V441 V442 V443 V444 V445 V446 V447 V448 V449 V450 V451
## df_1 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0
## V452 V453 V454 V455 V456 V457 V458 V459 V460 V461 V462 V463 V464 V465 V466
## df_1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## V467 V468 V469 V470 V471 V472 V473 V474 V475 V476 V477 V478 V479 V480 V481
## df_1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0
## V482 V483 V484 V485 V486 V487 V488 V489 V490 V491 V492 V493 V494 V495 V496
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V497 V498 V499 V500 V501 V502 V503 V504 V505 V506 V507 V508 V509 V510 V511
## df_1 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0
## V512 V513 V514 V515 V516 V517 V518 V519 V520 V521 V522 V523 V524 V525 V526
## df_1 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0
## V527 V528 V529 V530 V531 V532 V533 V534 V535 V536 V537 V538 V539 V540 V541
## df_1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0
## V542 V543 V544 V545 V546 V547 V548 V549 V550 V551 V552 V553 V554 V555 V556
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V557 V558 V559 V560 V561 V562 V563 V564 V565 V566 V567 V568 V569 V570 V571
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V572 V573 V574 V575 V576 V577 V578 V579 V580 V581 V582 V583 V584 V585 V586
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V587 V588 V589 V590 V591 V592 V593 V594 V595 V596 V597 V598 V599 V600 V601
## df_1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## V602 V603 V604 V605 V606 V607 V608 V609 V610 V611 V612 V613 V614 V615 V616
## df_1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## V617 V618 V619 V620 V621 V622 V623 V624 V625 V626 V627 V628 V629 V630 V631
## df_1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## V632 V633 V634 V635 V636 V637 V638 V639 V640 V641 V642 V643 V644 V645 V646
## df_1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V647 V648 V649 V650 V651 V652 V653 V654 V655 V656 V657 V658 V659 V660 V661
## df_1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0
## V662 V663 V664 V665 V666 V667 V668 V669 V670 V671 V672 V673 V674 V675 V676
## df_1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## V677 V678 V679 V680 V681 V682 V683 V684 V685 V686 V687 V688 V689 V690 V691
## df_1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## V692 V693 V694 V695 V696 V697 V698 V699 V700 V701 V702 V703 V704 V705 V706
## df_1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0
## V707 V708 V709 V710 V711 V712 V713 V714 V715 V716 V717 V718 V719 V720 V721
## df_1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## V722 V723 V724 V725 V726 V727 V728 V729 V730 V731 V732 V733 V734 V735 V736
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V737 V738 V739 V740 V741 V742 V743 V744 V745 V746 V747 V748 V749 V750 V751
## df_1 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0
## V752 V753 V754 V755 V756 V757 V758 V759 V760 V761 V762 V763 V764 V765 V766
## df_1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0
## V767 V768 V769 V770 V771 V772 V773 V774 V775 V776 V777 V778 V779 V780 V781
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V782 V783 V784 V785 V786 V787 V788 V789 V790 V791 V792 V793 V794 V795 V796
## df_1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## V797 V798 V799 V800 V801 V802 V803 V804 V805 V806 V807 V808 V809 V810 V811
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V812 V813 V814 V815 V816 V817 V818 V819 V820 V821 V822 V823 V824 V825 V826
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V827 V828 V829 V830 V831 V832 V833 V834 V835 V836 V837 V838 V839 V840 V841
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V842 V843 V844 V845 V846 V847 V848 V849 V850 V851 V852 V853 V854 V855 V856
## df_1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## V857 V858 V859 V860 V861 V862 V863 V864 V865 V866 V867 V868 V869 V870 V871
## df_1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## V872 V873 V874 V875 V876 V877 V878 V879 V880 V881 V882 V883 V884 V885 V886
## df_1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## V887 V888 V889 V890 V891 V892 V893 V894 V895 V896 V897 V898 V899 V900 V901
## df_1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## V902 V903 V904 V905 V906 V907 V908 V909 V910 V911 V912 V913 V914 V915 V916
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V917 V918 V919 V920 V921 V922 V923 V924 V925 V926 V927 V928 V929 V930 V931
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V932 V933 V934 V935 V936 V937 V938 V939 V940 V941 V942 V943 V944 V945 V946
## df_1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0
## V947 V948 V949 V950 V951 V952 V953 V954 V955 V956 V957 V958 V959 V960 V961
## df_1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## V962 V963 V964 V965 V966 V967 V968 V969 V970 V971 V972 V973 V974 V975 V976
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V977 V978 V979 V980 V981 V982 V983 V984 V985 V986 V987 V988 V989 V990 V991
## df_1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## V992 V993 V994 V995 V996 V997 V998 V999 V1000 V1001 V1002 V1003 V1004
## df_1 0 0 0 0 0 0 0 0 0 0 0 0 0
## V1005 V1006 V1007 V1008 V1009 V1010 V1011 V1012 V1013 V1014 V1015 V1016
## df_1 1 0 0 1 0 1 1 0 0 0 0 0
## V1017 V1018 V1019 V1020 V1021 V1022 V1023 V1024
## df_1 0 0 0 0 1 0 0 0
#View(data_4)
data_5 <- data_4[,c(5:ncol(data_4),2)]
#View(head(data_5))
#table(data_5$cardio_call)
write.csv(data_5,paste0(dir_path,"ECFP4\\",Sys.Date(),"-","data_for_model.csv"),row.names = F)