rm(list = ls())
###############################input data_1 
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\data\\"
dir_path_name <- dir(dir_path,pattern = ".*.csv",full.names = T)
#dir_path_name
###############################merge data 
data_1 <- read.csv(grep("data_1.csv",dir_path_name,value = T),header = T,stringsAsFactors = F)
#dim(data_1) #[1] 882   7
head(data_1,2)
##       Mapping.ID cancer_type                   Sample.Name        LYCHI
## 1 ACGUYXCXAPNIKK       liver               HEXACHLOROPHENE KV4X1G8537UJ
## 2 ACSIXWWBWUQEHA      kidney Clodronic acid, disodium salt 25HNUDAS8UUC
##        NCGC.ID Approval.Status                                 Primary.MOA
## 1 NCGC00091195          US FDA Glucose-6-phosphate dehydrogenase Inhibitor
## 2 NCGC00016141                             ADP/ATP translocase 3 Inhibitor
data_1 <- data_1[1:200, ]
#data_1$Pathway <- NULL
#data_1 <- unique(data_1)
#dim(data_1) #[1] 8742    3
pro <- data.frame(table(data_1$cancer_type, data_1$Mapping.ID))
#dim(pro) #[1] 1050    3
#View(pro)
pro$Var1 <- factor(pro$Var1, labels = LETTERS[1:15])
#head(pro)
dat <- pro
colnames(dat)[1] <- "source"
#library(stringr)
# Calling str_to_title() function 
#dat$source <- str_to_title(dat$source) 
#dat$source[dat$source == "Aml"] <- "AML"
#write.csv(dat, paste0(dir_path,Sys.Date(),"-","cancer_type_stat_drug.csv"),row.names = FALSE)
######################################################
head(dat)
##   source           Var2 Freq
## 1      A ACGUYXCXAPNIKK    0
## 2      B ACGUYXCXAPNIKK    0
## 3      C ACGUYXCXAPNIKK    0
## 4      D ACGUYXCXAPNIKK    0
## 5      E ACGUYXCXAPNIKK    0
## 6      F ACGUYXCXAPNIKK    0
library(ggplot2)
p1 <- ggplot(dat, aes(x= Var2, y=source, fill= factor(Freq), colour=factor(Freq))) + 
  geom_tile(color="white", size=0.6) + labs(x="", y=NULL, title="") + 
  theme(legend.position= "top",
        panel.spacing = unit(0, "cm"),
        legend.text = element_text(colour="#004c99", size=12, 
                                   face="plain",family="sans"),
        plot.margin = unit(c(0,0.5,0,0), "cm"),
        legend.key.width = unit(0.05, "cm"),
        legend.key.height=unit(0.2, "cm"),
        legend.key = element_rect(colour = NA, fill = NA),
        legend.background = element_rect(fill=NA,
                                         size=0.1, linetype="solid", 
                                         colour ="darkblue"),
        axis.ticks = element_blank(),
        axis.text.x = element_blank(),
        axis.text.y =  element_text(color="#004c99", size=12, face="plain",family="sans"),
        axis.title.y = element_blank(),
        axis.title.x =  element_text(color="#004c99", size=12, face="plain",family="sans"),
        strip.text = element_blank()) +
  scale_x_discrete(expand = c(0, 0))+
  scale_y_discrete(expand = c(0, 0)) +
  facet_grid(source ~. ,scales = "free")+
  scale_fill_manual(values = c("0" = "gray80","1" = "darkred"),
                    name="",
                    labels=c("Missing value","Drug"))

p1

#output
ggsave(filename = paste0(Sys.Date(),"-HM-2.tif"),
       plot = p1, 
       device = "tiff", path = dir_path,
       scale = 1, width = 30, height = 15, 
       units = "cm",
       dpi = 300, limitsize = TRUE, 
       compression = "lzw")

ggsave(filename = paste0(Sys.Date(),"-DRUG_HM-1.pdf"), plot = p1, 
       device = "pdf", path = dir_path,
       scale = 1, width = 36, height = 20, units = "cm",
       dpi = 300, limitsize = TRUE)