rm(list = ls()) 
#1.1 input data
######data input
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\Fig.3_final\\"
dir_path_name <- dir(dir_path,pattern = "*.csv",full.names = T)
dir_path_name
## [1] "C:\\Users\\liyix\\OneDrive\\Desktop\\Fig.3_final\\2020-01-14-max-auc_each_feature_selction.csv"       
## [2] "C:\\Users\\liyix\\OneDrive\\Desktop\\Fig.3_final\\2020-01-16-max-auc_ba_mcc_each_feature_selction.csv"
data_1 <- read.csv(grep("2020-01-16-max-auc_ba_mcc_each_feature_selction.csv",dir_path_name,value = T),
                   header = T, stringsAsFactors = F)
#View(data_1)
unique(data_1$in_vivo)
## [1] "carcinogenicity"        "cardiotoxicity"         "developmental toxicity"
## [4] "hepatotoxicity"         "nephrotoxicity"         "neurotoxicity"         
## [7] "reproductive toxicity"  "skin irritation"        "skin sensitization"
data_1$in_vivo <- gsub("carcinogenicity","Carcinogenicity",data_1$in_vivo)
data_1$in_vivo <- gsub("cardiotoxicity","Cardiotoxicity",data_1$in_vivo)
data_1$in_vivo <- gsub("developmental toxicity","Developmental toxicity",data_1$in_vivo)
data_1$in_vivo <- gsub("hepatotoxicity" ,"Hepatotoxicity" ,data_1$in_vivo)
data_1$in_vivo <- gsub("nephrotoxicity","Nephrotoxicity",data_1$in_vivo)
data_1$in_vivo <- gsub("neurotoxicity","Neurotoxicity",data_1$in_vivo)
data_1$in_vivo <- gsub("reproductive toxicity","Reproductive toxicity",data_1$in_vivo)
data_1$in_vivo <- gsub("skin irritation","Skin irritation",data_1$in_vivo)
data_1$in_vivo <- gsub("skin sensitization","Skin sensitization",data_1$in_vivo)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.6     v dplyr   1.0.4
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## Warning: package 'tidyr' was built under R version 4.0.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
#View(data_1)
data_2 <- gather(data_1, key = "category", value = "value",-1,-2)
table(data_2$feature_selection)
## 
##             AUC-ROC          chi square Fisher's exact test                  RF 
##                  27                  27                  27                  27 
##             XGboost 
##                  27
data_2 <- data_2[which(data_2$feature_selection != "chi square"),]
head(data_2)
##                  in_vivo feature_selection category     value
## 1        Carcinogenicity           AUC-ROC      AUC 0.7157781
## 2         Cardiotoxicity           AUC-ROC      AUC 0.7604545
## 3 Developmental toxicity           AUC-ROC      AUC 0.7732337
## 4         Hepatotoxicity           AUC-ROC      AUC 0.7647261
## 5         Nephrotoxicity           AUC-ROC      AUC 0.7346660
## 6          Neurotoxicity           AUC-ROC      AUC 0.7815116
data_2$label <- paste(data_2$in_vivo,data_2$feature_selection,data_2$category,sep = "-")
dim(data_2) #[1] 108   4
## [1] 108   5
unique(data_2$category)
## [1] "AUC"   "B_ACC" "MCC"
head(data_2)
##                  in_vivo feature_selection category     value
## 1        Carcinogenicity           AUC-ROC      AUC 0.7157781
## 2         Cardiotoxicity           AUC-ROC      AUC 0.7604545
## 3 Developmental toxicity           AUC-ROC      AUC 0.7732337
## 4         Hepatotoxicity           AUC-ROC      AUC 0.7647261
## 5         Nephrotoxicity           AUC-ROC      AUC 0.7346660
## 6          Neurotoxicity           AUC-ROC      AUC 0.7815116
##                                label
## 1        Carcinogenicity-AUC-ROC-AUC
## 2         Cardiotoxicity-AUC-ROC-AUC
## 3 Developmental toxicity-AUC-ROC-AUC
## 4         Hepatotoxicity-AUC-ROC-AUC
## 5         Nephrotoxicity-AUC-ROC-AUC
## 6          Neurotoxicity-AUC-ROC-AUC
##1.2 same plot
d1 = data.frame(x1=c(1,1,1), x2=c(2,2,2), y1=c(1,2,3), y2=c(2,3,4), category=c("MCC",'B_ACC','AUC'))
d1[4:12,1] <- d1$x1[1] 
d1[4:12,2] <- d1$x2[1]
d1[4:12,3] <- 4:12 
d1[4:12,4] <- 5:13
d1[4:12,5] <- rep(c("MCC",'B_ACC','AUC'),times = 3)
data_list <- list()
for (i in 1:8) {
  d2 <- d1
  d2$x1 <- d2$x1+i
  d2$x2 <- d2$x2+i
  data_list[[i]] <- d2
}
d3 <- do.call('rbind',data_list)
d4 <- rbind(d1,d3)
dim(d4) #[1] 108   5
## [1] 108   5
head(d4)
##   x1 x2 y1 y2 category
## 1  1  2  1  2      MCC
## 2  1  2  2  3    B_ACC
## 3  1  2  3  4      AUC
## 4  1  2  4  5      MCC
## 5  1  2  5  6    B_ACC
## 6  1  2  6  7      AUC
unique(d4$x1)
## [1] 1 2 3 4 5 6 7 8 9
unique(d4$y1)
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12
unique(data_2$feature_selection)
## [1] "AUC-ROC"             "Fisher's exact test" "RF"                 
## [4] "XGboost"
d4$in_vivo <- "aa"
for (i in 1:length(unique(data_2$in_vivo))) {
  d4$in_vivo[d4$x1 == i] <-  unique(data_2$in_vivo)[i]
}
colnames(data_2)
## [1] "in_vivo"           "feature_selection" "category"         
## [4] "value"             "label"
d4$feature_selection <- "aaa"
y1_num <- c(1,4,7,10)
for (i in 1:4) {
  d4$feature_selection[d4$y1 >= y1_num[i]] <-  unique(data_2$feature_selection)[i]
}
ggplot() + 
  scale_x_continuous(name="x") + 
  scale_y_continuous(name="y") +
  geom_rect(data=d4, mapping=aes(xmin=x1, xmax=x2, ymin=y1, ymax=y2,fill = feature_selection), 
            color="black")

##merge data
colnames(d4);colnames(data_2)
## [1] "x1"                "x2"                "y1"               
## [4] "y2"                "category"          "in_vivo"          
## [7] "feature_selection"
## [1] "in_vivo"           "feature_selection" "category"         
## [4] "value"             "label"
data_3 <- merge(d4,data_2, by = c("feature_selection","category","in_vivo"))
library(ggplot2)
##########plot
#install.packages("ggnewscale")
library(ggnewscale)
red  <- "#AB221F"
blue <- "#3878C1"
green <- "green"
nake <- "#FFFADD"
unique(data_3$in_vivo)
## [1] "Carcinogenicity"        "Cardiotoxicity"         "Developmental toxicity"
## [4] "Hepatotoxicity"         "Nephrotoxicity"         "Neurotoxicity"         
## [7] "Reproductive toxicity"  "Skin irritation"        "Skin sensitization"
#head(auc_data)
auc_data <- data_3[which(data_3$category == "AUC"),]
colnames(auc_data) <- gsub("value","AUC-ROC",colnames(auc_data))
ba_data <- data_3[which(data_3$category == "B_ACC"),]
colnames(ba_data) <- gsub("value","BA",colnames(ba_data))
mcc_data <- data_3[which(data_3$category == "MCC"),]
colnames(mcc_data) <- gsub("value","MCC",colnames(mcc_data ))
str(auc_data)
## 'data.frame':    36 obs. of  9 variables:
##  $ feature_selection: chr  "AUC-ROC" "AUC-ROC" "AUC-ROC" "AUC-ROC" ...
##  $ category         : chr  "AUC" "AUC" "AUC" "AUC" ...
##  $ in_vivo          : chr  "Carcinogenicity" "Cardiotoxicity" "Developmental toxicity" "Hepatotoxicity" ...
##  $ x1               : num  1 2 3 4 5 6 7 8 9 1 ...
##  $ x2               : num  2 3 4 5 6 7 8 9 10 2 ...
##  $ y1               : num  3 3 3 3 3 3 3 3 3 6 ...
##  $ y2               : num  4 4 4 4 4 4 4 4 4 7 ...
##  $ AUC-ROC          : num  0.716 0.76 0.773 0.765 0.735 ...
##  $ label            : chr  "Carcinogenicity-AUC-ROC-AUC" "Cardiotoxicity-AUC-ROC-AUC" "Developmental toxicity-AUC-ROC-AUC" "Hepatotoxicity-AUC-ROC-AUC" ...
min(auc_data$`AUC-ROC`) #[1] 0.671383
## [1] 0.671383
max(auc_data$`AUC-ROC`) #[1] 0.9323617
## [1] 0.9323617
max(mcc_data$MCC) #[1] 0.7444981
## [1] 0.7444981
min(mcc_data$MCC) #[1] 0.3124011
## [1] 0.3124011
min(ba_data$BA) #[1] 0.6368086
## [1] 0.6368086
max(ba_data$BA) #[1] 0.9120028
## [1] 0.9120028
#######
library(cowplot)
#??get_legend
p1 <- ggplot() +
  ##1.1  draw up plot_AUC
  geom_rect(data=auc_data, 
            mapping=aes(xmin=x1, xmax=x2, ymin=y1, ymax=y2,fill = `AUC-ROC`), 
            color="black") +
  ## 上三角用表达值来配色
  #scale_fill_gradientn(colours = c(nake,red)) +
  scale_fill_continuous(name = "AUC-ROC",low = nake,  high = red,
                        limits=c(0.6,1),breaks = seq(0.6,1,0.2))+
  ## 神技能,清空aes
  new_scale_fill() +
  ##1.2  draw middle plot_BA
  geom_rect(data=ba_data, 
            mapping=aes(xmin=x1, xmax=x2, ymin=y1, ymax=y2,fill = BA), 
            color="black") +
  ## 下三角用表达值来配色
  #scale_fill_gradientn(colours = c(nake,blue))+
  scale_fill_continuous(name = "BA",low = nake,  high = blue,
                        limits=c(0.6,1),breaks = seq(0.6,1,0.2)) +
  ## 神技能,清空aes
  new_scale_fill() +
  ##1.3  draw middle plot_MCC
  geom_rect(data=mcc_data, 
            mapping=aes(xmin=x1, xmax=x2, ymin=y1, ymax=y2,fill =MCC), 
            color="black") +
  ## 下三角用表达值来配色
  #scale_fill_gradientn(colours = c(nake,green)) +
  scale_fill_continuous(name = "MCC",low = nake,  high = green,
                        limits=c(0.3,0.9),breaks = seq(0.3,0.9,0.2)) +
  facet_grid(feature_selection ~ ., scales = "free",switch ="y")
p1

p2 <- p1 +labs(x="", y="Feature selection method", title="")+
  scale_x_continuous(expand = c(0,0),limits = c(0.99,10.1),breaks = seq(1.5,10,1),
                     labels = NULL,
                     sec.axis = sec_axis(trans = ~., breaks = seq(1.5,10,1),
                                         labels = unique(data_3$in_vivo))) +
  scale_y_continuous(expand = c(0,0))+
  theme(plot.margin = margin(0, 3.3, 0,0, "cm"),
        legend.position= "none",
        legend.direction= "horizontal",
        legend.title = element_text(colour="black", size=13, 
                                    face="plain",family="sans",vjust = 0.9),
        legend.text = element_text(colour="black", size=13, 
                                   face="plain",family="sans"),
        legend.background = element_rect(fill="white",
                                         size=0.1, linetype=NULL, 
                                         colour ="white"),
        #legend.key.size = unit(1, "cm"),
        legend.key.width = unit(0.7, "cm"),
        legend.key.height=unit(0.4, "cm"),
        plot.background = element_rect(fill = "white"),
        panel.background = element_rect(fill = "white",
                                        colour = "white",
                                        size = 0.5, linetype = "solid"),
        panel.grid = element_line(size = 0.5, linetype = 'solid',
                                  colour = "white"),
        axis.title.x = element_text(color="black", size=18, face="plain",family="sans"),
        axis.title.y = element_text(color="black", size=20, face="bold",family="sans",angle = 90),
        axis.ticks = element_blank(),
        axis.text.x.top = element_text(color="black", size=18, face="plain",family="sans",
                                       angle = 45,vjust = 0.005,hjust = 0),
        axis.text.y =element_blank(),
        strip.text = element_text(size=16, color="black",
                                  face="plain",family="sans"),
        strip.background = element_rect(colour="white", fill="white", 
                                        size=0.25, linetype="solid")) 
p2 
#############legend
colnames(auc_data)
## [1] "feature_selection" "category"          "in_vivo"          
## [4] "x1"                "x2"                "y1"               
## [7] "y2"                "AUC-ROC"           "label"
legend_auc <- get_legend(ggplot() +
                           ##1.1  draw up plot_AUC
                           geom_tile(data=auc_data, 
                                     mapping=aes(x= in_vivo,y = feature_selection,fill = `AUC-ROC`), 
                                     color="black") +
                           theme(legend.position ="right",
                                 legend.box = "horizontal",
                                 legend.title = element_text(colour="black", size=18, 
                                                             face="plain",family="sans",vjust = 0.9),
                                 legend.text = element_text(colour="black", size=18, 
                                                            face="plain",family="sans"),
                                 legend.background = element_rect(fill="white",
                                                                  size=0.1, linetype=NULL, 
                                                                  colour ="white"),
                                 #legend.key.size = unit(1, "cm"),
                                 legend.key.width = unit(0.5, "cm"),
                                 legend.key.height=unit(0.6, "cm"))+
                           ## 上三角用表达值来配色
                           #scale_fill_gradientn(colours = c(nake,red)) +
                           scale_fill_continuous(name = "AUC-ROC",low = nake,  high = red,
                                                 limits=c(0.6,1),breaks = seq(0.6,1,0.2)))
legend_ba <- get_legend(ggplot() +
                          ##1.1  draw up plot_AUC
                          geom_tile(data=ba_data, 
                                    mapping=aes(x= in_vivo,y = feature_selection,fill = BA), 
                                    color="black") +
                          theme(legend.position ="right",
                                legend.box = "horizontal",
                                legend.title = element_text(colour="black", size=18, 
                                                            face="plain",family="sans",vjust = 0.9),
                                legend.text = element_text(colour="black", size=18, 
                                                           face="plain",family="sans"),
                                legend.background = element_rect(fill="white",
                                                                 size=0.1, linetype=NULL, 
                                                                 colour ="white"),
                                #legend.key.size = unit(1, "cm"),
                                legend.key.width = unit(0.5, "cm"),
                                legend.key.height=unit(0.6, "cm"))+
                          ## 上三角用表达值来配色
                          #scale_fill_gradientn(colours = c(nake,red)) +
                          scale_fill_continuous(name = "BA",low = nake,  high = blue,
                                                limits=c(0.6,1),breaks = seq(0.6,1,0.2)))
legend_mcc <- get_legend(ggplot() +
                           ##1.1  draw up plot_AUC
                           geom_tile(data=mcc_data, 
                                     mapping=aes(x= in_vivo,y = feature_selection,fill = MCC), 
                                     color="black") +
                           theme(legend.position ="right",
                                 legend.box = "horizontal",
                                 legend.title = element_text(colour="black", size=18, 
                                                             face="plain",family="sans",vjust = 0.9),
                                 legend.text = element_text(colour="black", size=18, 
                                                            face="plain",family="sans"),
                                 legend.background = element_rect(fill="white",
                                                                  size=0.1, linetype=NULL, 
                                                                  colour ="white"),
                                 #legend.key.size = unit(1, "cm"),
                                 legend.key.width = unit(0.5, "cm"),
                                 legend.key.height=unit(0.6, "cm"))+
                           ## 上三角用表达值来配色
                           #scale_fill_gradientn(colours = c(nake,red)) +
                           scale_fill_continuous(name = "MCC",low = nake,  high = green,
                                                 limits=c(0.3,0.9), breaks = seq(0.3,0.9,0.3)))

library(grid)
grid.draw(legend_auc) 

#?grid.draw
p4 <- 
  ggdraw() +
  draw_plot(p2, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(legend_auc, x = 0.89, y = 0.65, width = 0.1, height = 0.1)  +
  draw_plot(legend_ba, x = 0.865, y = 0.45, width = 0.1, height = 0.1) +
  draw_plot(legend_mcc, x = 0.865, y = 0.25, width = 0.1, height = 0.1)

p4

####output
ggsave(filename = paste0(Sys.Date(),"-merge_heatmap_5.tif"), plot = p4, 
       device = "tiff", path = dir_path,
       scale = 1, compression = "lzw",width =26, height = 26, units = "cm",
       dpi = 300, limitsize = TRUE)