rm(list = ls())
#1.1 input data
######data input
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\Fig.3_final\\"
dir_path_name <- dir(dir_path,pattern = "*.csv",full.names = T)
dir_path_name
## [1] "C:\\Users\\liyix\\OneDrive\\Desktop\\Fig.3_final\\2020-01-14-max-auc_each_feature_selction.csv"
## [2] "C:\\Users\\liyix\\OneDrive\\Desktop\\Fig.3_final\\2020-01-16-max-auc_ba_mcc_each_feature_selction.csv"
data_1 <- read.csv(grep("2020-01-16-max-auc_ba_mcc_each_feature_selction.csv",dir_path_name,value = T),
header = T, stringsAsFactors = F)
#View(data_1)
unique(data_1$in_vivo)
## [1] "carcinogenicity" "cardiotoxicity" "developmental toxicity"
## [4] "hepatotoxicity" "nephrotoxicity" "neurotoxicity"
## [7] "reproductive toxicity" "skin irritation" "skin sensitization"
data_1$in_vivo <- gsub("carcinogenicity","Carcinogenicity",data_1$in_vivo)
data_1$in_vivo <- gsub("cardiotoxicity","Cardiotoxicity",data_1$in_vivo)
data_1$in_vivo <- gsub("developmental toxicity","Developmental toxicity",data_1$in_vivo)
data_1$in_vivo <- gsub("hepatotoxicity" ,"Hepatotoxicity" ,data_1$in_vivo)
data_1$in_vivo <- gsub("nephrotoxicity","Nephrotoxicity",data_1$in_vivo)
data_1$in_vivo <- gsub("neurotoxicity","Neurotoxicity",data_1$in_vivo)
data_1$in_vivo <- gsub("reproductive toxicity","Reproductive toxicity",data_1$in_vivo)
data_1$in_vivo <- gsub("skin irritation","Skin irritation",data_1$in_vivo)
data_1$in_vivo <- gsub("skin sensitization","Skin sensitization",data_1$in_vivo)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.6 v dplyr 1.0.4
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'tidyr' was built under R version 4.0.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#View(data_1)
data_2 <- gather(data_1, key = "category", value = "value",-1,-2)
table(data_2$feature_selection)
##
## AUC-ROC chi square Fisher's exact test RF
## 27 27 27 27
## XGboost
## 27
data_2 <- data_2[which(data_2$feature_selection != "chi square"),]
head(data_2)
## in_vivo feature_selection category value
## 1 Carcinogenicity AUC-ROC AUC 0.7157781
## 2 Cardiotoxicity AUC-ROC AUC 0.7604545
## 3 Developmental toxicity AUC-ROC AUC 0.7732337
## 4 Hepatotoxicity AUC-ROC AUC 0.7647261
## 5 Nephrotoxicity AUC-ROC AUC 0.7346660
## 6 Neurotoxicity AUC-ROC AUC 0.7815116
data_2$label <- paste(data_2$in_vivo,data_2$feature_selection,data_2$category,sep = "-")
dim(data_2) #[1] 108 4
## [1] 108 5
unique(data_2$category)
## [1] "AUC" "B_ACC" "MCC"
head(data_2)
## in_vivo feature_selection category value
## 1 Carcinogenicity AUC-ROC AUC 0.7157781
## 2 Cardiotoxicity AUC-ROC AUC 0.7604545
## 3 Developmental toxicity AUC-ROC AUC 0.7732337
## 4 Hepatotoxicity AUC-ROC AUC 0.7647261
## 5 Nephrotoxicity AUC-ROC AUC 0.7346660
## 6 Neurotoxicity AUC-ROC AUC 0.7815116
## label
## 1 Carcinogenicity-AUC-ROC-AUC
## 2 Cardiotoxicity-AUC-ROC-AUC
## 3 Developmental toxicity-AUC-ROC-AUC
## 4 Hepatotoxicity-AUC-ROC-AUC
## 5 Nephrotoxicity-AUC-ROC-AUC
## 6 Neurotoxicity-AUC-ROC-AUC
##1.2 same plot
d1 = data.frame(x1=c(1,1,1), x2=c(2,2,2), y1=c(1,2,3), y2=c(2,3,4), category=c("MCC",'B_ACC','AUC'))
d1[4:12,1] <- d1$x1[1]
d1[4:12,2] <- d1$x2[1]
d1[4:12,3] <- 4:12
d1[4:12,4] <- 5:13
d1[4:12,5] <- rep(c("MCC",'B_ACC','AUC'),times = 3)
data_list <- list()
for (i in 1:8) {
d2 <- d1
d2$x1 <- d2$x1+i
d2$x2 <- d2$x2+i
data_list[[i]] <- d2
}
d3 <- do.call('rbind',data_list)
d4 <- rbind(d1,d3)
dim(d4) #[1] 108 5
## [1] 108 5
head(d4)
## x1 x2 y1 y2 category
## 1 1 2 1 2 MCC
## 2 1 2 2 3 B_ACC
## 3 1 2 3 4 AUC
## 4 1 2 4 5 MCC
## 5 1 2 5 6 B_ACC
## 6 1 2 6 7 AUC
unique(d4$x1)
## [1] 1 2 3 4 5 6 7 8 9
unique(d4$y1)
## [1] 1 2 3 4 5 6 7 8 9 10 11 12
unique(data_2$feature_selection)
## [1] "AUC-ROC" "Fisher's exact test" "RF"
## [4] "XGboost"
d4$in_vivo <- "aa"
for (i in 1:length(unique(data_2$in_vivo))) {
d4$in_vivo[d4$x1 == i] <- unique(data_2$in_vivo)[i]
}
colnames(data_2)
## [1] "in_vivo" "feature_selection" "category"
## [4] "value" "label"
d4$feature_selection <- "aaa"
y1_num <- c(1,4,7,10)
for (i in 1:4) {
d4$feature_selection[d4$y1 >= y1_num[i]] <- unique(data_2$feature_selection)[i]
}
ggplot() +
scale_x_continuous(name="x") +
scale_y_continuous(name="y") +
geom_rect(data=d4, mapping=aes(xmin=x1, xmax=x2, ymin=y1, ymax=y2,fill = feature_selection),
color="black")

##merge data
colnames(d4);colnames(data_2)
## [1] "x1" "x2" "y1"
## [4] "y2" "category" "in_vivo"
## [7] "feature_selection"
## [1] "in_vivo" "feature_selection" "category"
## [4] "value" "label"
data_3 <- merge(d4,data_2, by = c("feature_selection","category","in_vivo"))
library(ggplot2)
##########plot
#install.packages("ggnewscale")
library(ggnewscale)
red <- "#AB221F"
blue <- "#3878C1"
green <- "green"
nake <- "#FFFADD"
unique(data_3$in_vivo)
## [1] "Carcinogenicity" "Cardiotoxicity" "Developmental toxicity"
## [4] "Hepatotoxicity" "Nephrotoxicity" "Neurotoxicity"
## [7] "Reproductive toxicity" "Skin irritation" "Skin sensitization"
#head(auc_data)
auc_data <- data_3[which(data_3$category == "AUC"),]
colnames(auc_data) <- gsub("value","AUC-ROC",colnames(auc_data))
ba_data <- data_3[which(data_3$category == "B_ACC"),]
colnames(ba_data) <- gsub("value","BA",colnames(ba_data))
mcc_data <- data_3[which(data_3$category == "MCC"),]
colnames(mcc_data) <- gsub("value","MCC",colnames(mcc_data ))
str(auc_data)
## 'data.frame': 36 obs. of 9 variables:
## $ feature_selection: chr "AUC-ROC" "AUC-ROC" "AUC-ROC" "AUC-ROC" ...
## $ category : chr "AUC" "AUC" "AUC" "AUC" ...
## $ in_vivo : chr "Carcinogenicity" "Cardiotoxicity" "Developmental toxicity" "Hepatotoxicity" ...
## $ x1 : num 1 2 3 4 5 6 7 8 9 1 ...
## $ x2 : num 2 3 4 5 6 7 8 9 10 2 ...
## $ y1 : num 3 3 3 3 3 3 3 3 3 6 ...
## $ y2 : num 4 4 4 4 4 4 4 4 4 7 ...
## $ AUC-ROC : num 0.716 0.76 0.773 0.765 0.735 ...
## $ label : chr "Carcinogenicity-AUC-ROC-AUC" "Cardiotoxicity-AUC-ROC-AUC" "Developmental toxicity-AUC-ROC-AUC" "Hepatotoxicity-AUC-ROC-AUC" ...
min(auc_data$`AUC-ROC`) #[1] 0.671383
## [1] 0.671383
max(auc_data$`AUC-ROC`) #[1] 0.9323617
## [1] 0.9323617
max(mcc_data$MCC) #[1] 0.7444981
## [1] 0.7444981
min(mcc_data$MCC) #[1] 0.3124011
## [1] 0.3124011
min(ba_data$BA) #[1] 0.6368086
## [1] 0.6368086
max(ba_data$BA) #[1] 0.9120028
## [1] 0.9120028
#######
library(cowplot)
#??get_legend
p1 <- ggplot() +
##1.1 draw up plot_AUC
geom_rect(data=auc_data,
mapping=aes(xmin=x1, xmax=x2, ymin=y1, ymax=y2,fill = `AUC-ROC`),
color="black") +
## 上三角用表达值来配色
#scale_fill_gradientn(colours = c(nake,red)) +
scale_fill_continuous(name = "AUC-ROC",low = nake, high = red,
limits=c(0.6,1),breaks = seq(0.6,1,0.2))+
## 神技能,清空aes
new_scale_fill() +
##1.2 draw middle plot_BA
geom_rect(data=ba_data,
mapping=aes(xmin=x1, xmax=x2, ymin=y1, ymax=y2,fill = BA),
color="black") +
## 下三角用表达值来配色
#scale_fill_gradientn(colours = c(nake,blue))+
scale_fill_continuous(name = "BA",low = nake, high = blue,
limits=c(0.6,1),breaks = seq(0.6,1,0.2)) +
## 神技能,清空aes
new_scale_fill() +
##1.3 draw middle plot_MCC
geom_rect(data=mcc_data,
mapping=aes(xmin=x1, xmax=x2, ymin=y1, ymax=y2,fill =MCC),
color="black") +
## 下三角用表达值来配色
#scale_fill_gradientn(colours = c(nake,green)) +
scale_fill_continuous(name = "MCC",low = nake, high = green,
limits=c(0.3,0.9),breaks = seq(0.3,0.9,0.2)) +
facet_grid(feature_selection ~ ., scales = "free",switch ="y")
p1

p2 <- p1 +labs(x="", y="Feature selection method", title="")+
scale_x_continuous(expand = c(0,0),limits = c(0.99,10.1),breaks = seq(1.5,10,1),
labels = NULL,
sec.axis = sec_axis(trans = ~., breaks = seq(1.5,10,1),
labels = unique(data_3$in_vivo))) +
scale_y_continuous(expand = c(0,0))+
theme(plot.margin = margin(0, 3.3, 0,0, "cm"),
legend.position= "none",
legend.direction= "horizontal",
legend.title = element_text(colour="black", size=13,
face="plain",family="sans",vjust = 0.9),
legend.text = element_text(colour="black", size=13,
face="plain",family="sans"),
legend.background = element_rect(fill="white",
size=0.1, linetype=NULL,
colour ="white"),
#legend.key.size = unit(1, "cm"),
legend.key.width = unit(0.7, "cm"),
legend.key.height=unit(0.4, "cm"),
plot.background = element_rect(fill = "white"),
panel.background = element_rect(fill = "white",
colour = "white",
size = 0.5, linetype = "solid"),
panel.grid = element_line(size = 0.5, linetype = 'solid',
colour = "white"),
axis.title.x = element_text(color="black", size=18, face="plain",family="sans"),
axis.title.y = element_text(color="black", size=20, face="bold",family="sans",angle = 90),
axis.ticks = element_blank(),
axis.text.x.top = element_text(color="black", size=18, face="plain",family="sans",
angle = 45,vjust = 0.005,hjust = 0),
axis.text.y =element_blank(),
strip.text = element_text(size=16, color="black",
face="plain",family="sans"),
strip.background = element_rect(colour="white", fill="white",
size=0.25, linetype="solid"))
p2
#############legend
colnames(auc_data)
## [1] "feature_selection" "category" "in_vivo"
## [4] "x1" "x2" "y1"
## [7] "y2" "AUC-ROC" "label"
legend_auc <- get_legend(ggplot() +
##1.1 draw up plot_AUC
geom_tile(data=auc_data,
mapping=aes(x= in_vivo,y = feature_selection,fill = `AUC-ROC`),
color="black") +
theme(legend.position ="right",
legend.box = "horizontal",
legend.title = element_text(colour="black", size=18,
face="plain",family="sans",vjust = 0.9),
legend.text = element_text(colour="black", size=18,
face="plain",family="sans"),
legend.background = element_rect(fill="white",
size=0.1, linetype=NULL,
colour ="white"),
#legend.key.size = unit(1, "cm"),
legend.key.width = unit(0.5, "cm"),
legend.key.height=unit(0.6, "cm"))+
## 上三角用表达值来配色
#scale_fill_gradientn(colours = c(nake,red)) +
scale_fill_continuous(name = "AUC-ROC",low = nake, high = red,
limits=c(0.6,1),breaks = seq(0.6,1,0.2)))
legend_ba <- get_legend(ggplot() +
##1.1 draw up plot_AUC
geom_tile(data=ba_data,
mapping=aes(x= in_vivo,y = feature_selection,fill = BA),
color="black") +
theme(legend.position ="right",
legend.box = "horizontal",
legend.title = element_text(colour="black", size=18,
face="plain",family="sans",vjust = 0.9),
legend.text = element_text(colour="black", size=18,
face="plain",family="sans"),
legend.background = element_rect(fill="white",
size=0.1, linetype=NULL,
colour ="white"),
#legend.key.size = unit(1, "cm"),
legend.key.width = unit(0.5, "cm"),
legend.key.height=unit(0.6, "cm"))+
## 上三角用表达值来配色
#scale_fill_gradientn(colours = c(nake,red)) +
scale_fill_continuous(name = "BA",low = nake, high = blue,
limits=c(0.6,1),breaks = seq(0.6,1,0.2)))
legend_mcc <- get_legend(ggplot() +
##1.1 draw up plot_AUC
geom_tile(data=mcc_data,
mapping=aes(x= in_vivo,y = feature_selection,fill = MCC),
color="black") +
theme(legend.position ="right",
legend.box = "horizontal",
legend.title = element_text(colour="black", size=18,
face="plain",family="sans",vjust = 0.9),
legend.text = element_text(colour="black", size=18,
face="plain",family="sans"),
legend.background = element_rect(fill="white",
size=0.1, linetype=NULL,
colour ="white"),
#legend.key.size = unit(1, "cm"),
legend.key.width = unit(0.5, "cm"),
legend.key.height=unit(0.6, "cm"))+
## 上三角用表达值来配色
#scale_fill_gradientn(colours = c(nake,red)) +
scale_fill_continuous(name = "MCC",low = nake, high = green,
limits=c(0.3,0.9), breaks = seq(0.3,0.9,0.3)))
library(grid)
grid.draw(legend_auc)

#?grid.draw
p4 <-
ggdraw() +
draw_plot(p2, x = 0, y = 0, width = 1, height = 1) +
draw_plot(legend_auc, x = 0.89, y = 0.65, width = 0.1, height = 0.1) +
draw_plot(legend_ba, x = 0.865, y = 0.45, width = 0.1, height = 0.1) +
draw_plot(legend_mcc, x = 0.865, y = 0.25, width = 0.1, height = 0.1)
p4

####output
ggsave(filename = paste0(Sys.Date(),"-merge_heatmap_5.tif"), plot = p4,
device = "tiff", path = dir_path,
scale = 1, compression = "lzw",width =26, height = 26, units = "cm",
dpi = 300, limitsize = TRUE)