rm(list = ls())
###############################input data_1
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
dir_path <- "C:\\Users\\xut2\\Desktop\\"
dir_path_name <- dir(dir_path,pattern = ".*.csv",full.names = T)
#dir_path_name
###############################1.1 bath input data
data_1 <- read.csv(grep("2025-09-30-582_predict_merge_all.csv",dir_path_name,value = T),header = T,stringsAsFactors = F)
# dim(data_1) #[1] 582 16
# table(data_1$antagonist)
# head(data_1)
#################################################
data_list <- list()
# head(data_1)
# colnames(data_1)
df1 <- data_1[, c( "Molecules", grep("class_1", colnames(data_1), value = T), "antagonist" )]
head(df1)
## Molecules class_1_NB class_1_NNET class_1_RF class_1_SVM class_1_XGB
## 1 FZYUKBLYECHAGN 1 0 0 0 0
## 2 FNKBVTBXFLSTPB 1 0 1 1 1
## 3 HTSNFXAICLXZMA 1 1 0 1 0
## 4 KKZGFVAZUKHFAC 1 1 1 1 1
## 5 RKXSQHTWNMDLJM 0 1 0 1 1
## 6 GHWJEDJMOVUXEC 1 1 1 1 1
## class_1_GCN antagonist
## 1 0 0
## 2 0 0
## 3 0 0
## 4 1 1
## 5 0 0
## 6 1 1
df2 <- gather(df1, key = key, value = value, -c(1, 8))
# head(df2)
# unique(df2$key)
for (i in 1:6) {
#i = 1
#i = 1
print(i)
tryCatch({
df_one <- df2[df2$key == unique(df2$key)[i], ]
dim(df_one[df_one$antagonist == 1 & df_one$value == 0, ])
#View(data_2)
data_3 <- data.frame(table(df_one$antagonist, df_one$value))
colnames(data_3) <- c("reality", "predict", "Freq")
#head(data_3)
data_3$geneid <- unique(df_one$key)
data_3$label[data_3$reality == 1 & data_3$predict == 1] <- "TP"
data_3$label[data_3$reality == 1 & data_3$predict == 0] <- "FN"
data_3$label[data_3$reality == 0 & data_3$predict == 1] <- "FP"
data_3$label[data_3$reality == 0 & data_3$predict == 0] <- "TN"
#data_list[[i]] <- data_3
result_1 <- data.frame(TP = data_3$Freq[data_3$label == "TP"],
TN = data_3$Freq[data_3$label == "TN"],
FP = data_3$Freq[data_3$label == "FP"],
FN = data_3$Freq[data_3$label == "FN"])
################calculate
#result_1$MCC <- calc_mcc(tp = result_1$TP, tn = result_1$TN, fp = result_1$FP, fn = result_1$FN)
result_1$sensitivity <- result_1$TP/c(result_1$TP + result_1$FN)
result_1$specificity <- result_1$TN/c(result_1$TN + result_1$FP)
result_1$accuracy <- c(result_1$TP + result_1$TN)/
c(result_1$TP + result_1$FN + result_1$TN + result_1$FP)
result_1$PPV <- result_1$TP/c(result_1$TP + result_1$FP)
result_1$NPV <- result_1$TN/c(result_1$TN + result_1$FN)
result_1$BA <- c(result_1$sensitivity + result_1$specificity)/2
result_1$F1_score <- 2*result_1$TP/c(2*result_1$TP + result_1$FP + result_1$FN)
result_1$model <- unique(df_one$key)
data_list[[i]] <- result_1
}, error = function(e) {
cat("ERROR :",conditionMessage(e), "\n")
cat("ERROR :", conditionMessage(e),"---",i,"---",gsub("\\:","-",Sys.time()),file = "error.txt", append = TRUE, "\n")
#cat(message('** ERR at ', Sys.time(), " **"),file = "test_1.txt", append = TRUE)
#print(e)
})
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
df_pre <- do.call("rbind", data_list)
df_pre
## TP TN FP FN sensitivity specificity accuracy PPV NPV
## 1 76 301 114 91 0.4550898 0.7253012 0.6477663 0.4000000 0.7678571
## 2 74 316 99 93 0.4431138 0.7614458 0.6701031 0.4277457 0.7726161
## 3 113 309 106 54 0.6766467 0.7445783 0.7250859 0.5159817 0.8512397
## 4 124 276 139 43 0.7425150 0.6650602 0.6872852 0.4714829 0.8652038
## 5 104 301 114 63 0.6227545 0.7253012 0.6958763 0.4770642 0.8269231
## 6 138 238 177 29 0.8263473 0.5734940 0.6460481 0.4380952 0.8913858
## BA F1_score model
## 1 0.5901955 0.4257703 class_1_NB
## 2 0.6022798 0.4352941 class_1_NNET
## 3 0.7106125 0.5854922 class_1_RF
## 4 0.7037876 0.5767442 class_1_SVM
## 5 0.6740278 0.5402597 class_1_XGB
## 6 0.6999206 0.5726141 class_1_GCN
write.csv(df_pre, paste0(dir_path, Sys.Date(),"-ppv-", "step_1.csv"),row.names = T)