rm(list = ls())
library(ggplot2)
###############################input data_1 
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\"
dir_path_name <- dir(dir_path,pattern = ".*.csv",full.names = T)
dir_path_name
## [1] "C:\\Users\\liyix\\OneDrive\\Desktop\\2021-11-23-cal_ic50_all.csv"
## [2] "C:\\Users\\liyix\\OneDrive\\Desktop\\data.csv"
###############################merge data 
data_train <- read.csv(grep("data.csv",dir_path_name,value = T),header = T,stringsAsFactors = F)
dim(data_train) #[1] 1009    5
## [1] 1009    5
data_train$Freq <- factor(data_train$Freq)

ggplot(data_train) +
  stat_density(aes(x = probability_1, color = Freq),size = 1,
               alpha=0.5, bw = 0.01,  geom="line",position="identity") +
  scale_y_discrete(expand = c(0.01, 0)) +
  scale_x_continuous(expand = c(0, 0),limits = c(-0.05, 1.05),
                     breaks = seq(0,1,.2)) +
  labs(colour="Antiviral drug",x = "Probability")+
  #geom_vline(aes(xintercept=max(AUC)),
  #          color="blue", linetype="dashed", size=1)+
  theme(panel.spacing = unit(0.1, "cm"),
        legend.position= "top",
        legend.key = element_rect(colour = NA, fill = NA),
        legend.text=element_text(size=14),
        legend.title = element_text(size=14),
        axis.ticks = element_line(colour = "black", 
                                  size = 0.5, linetype = "solid"),
        axis.line = element_line(colour = "black", 
                                 size = 0.5, linetype = "solid"),
        axis.text =element_text(face="plain", color="black", family = "sans",
                                size=14,angle = 0),
        panel.background = element_rect(fill = "white",
                                        colour = "white",
                                        size = 0.5, linetype = "solid"),
        panel.grid.major = element_line(size = 1, linetype = 'dashed',
                                        colour = "white"),
        axis.title = element_text(color="black", size=14, face="plain",family="sans")) +
  scale_color_manual(name = "Antiviral drug",values=c("#3b58a7","#90278e"))

##########################output
ggsave(filename = paste0(Sys.Date(),"-probability_1.tif"), plot = last_plot(), 
       device = "tiff", path = dir_path,
       scale = 1, width = 16, height = 12, units = "cm",
       dpi = 300, limitsize = TRUE, compression = "lzw")
data_1 <- data_train[data_train$Freq == 1, ]
data_2 <- data_train[data_train$Freq != 1, ]
dim(data_1) #[1] 115   5
## [1] 115   5
#View(data_train)
head(data_1)
##    probability_0 probability_1 Freq   Virus                      Drug
## 11         0.080         0.920    1   HCV-6                sofosbuvir
## 17         0.002         0.998    1 HCV-H77                  elbasvir
## 28         0.054         0.946    1    H3N2 rimantadine hydrochloride
## 47         0.048         0.952    1   HCV-7               glecaprevir
## 48         0.090         0.910    1   HCV-3               eltrombopag
## 76         0.034         0.966    1   HCV-5                 bifendate
table(cut(data_1$probability_1, breaks = c(0,0.5,1), include.lowest = T))
## 
## [0,0.5] (0.5,1] 
##      13     102
table(cut(data_2$probability_1, breaks = c(0,0.5,1), include.lowest = T))
## 
## [0,0.5] (0.5,1] 
##     884      10
#https://stackoverflow.com/questions/17506053/making-line-legends-for-geom-density-in-ggplot2-in-r/51934610