rm(list = ls())
###############################input data_1 
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\data\\"
dir_path_name <- dir(dir_path,pattern = ".*.",full.names = T)
#dir_path_name
###############################merge data 
pro <- read.csv(grep("data.csv",dir_path_name,value = T),header = T,stringsAsFactors = F)
#dim(pro) #[1] 23022     5
#View(pro)
#colnames(pro)
pro <- data.frame(table(pro$cancer_type))
colnames(pro)[2] <- "Proteomics"
colnames(pro)[1] <- "cancer_type"
library(openxlsx)
rna <- read.xlsx(grep("select_gene_set_rna.xlsx",dir_path_name,value = T), sheet = 1)
#dim(rna) #[1] 23  4
#View(rna)
colnames(rna)[3] <- "RNA-seq"
rna <- rna[, c(2,3)]
####################
#head(pro); head(rna)
pro_rna <- merge(pro, rna, by = "cancer_type")
#View(pro_rna)
library(stringr)
# Calling str_to_title() function 
pro_rna$cancer_type <- str_to_title(pro_rna$cancer_type) 
pro_rna$cancer_type[pro_rna$cancer_type == "Aml"] <- "AML"
##############
library(tidyr)
#colnames(pro_rna)
pro_rna_1 <- gather(pro_rna, key = "cate", value = "value", -1)
#View(pro_rna_1)
#head(pro_rna_1,2)
####################################################
#str(pro_rna_1)
#unique(pro_rna_1$cancer_type)
#unique(pro_rna_1$cate)
library(data.table)
dat <- data.table(pro_rna_1)
dat[cate == "Proteomics",y_min := 0]
dat[cate == "Proteomics",y_max := 2800]
dat[cate == "RNA-seq",y_min := 0]
dat[cate == "RNA-seq",y_max := 13000]
#view(dat)
#dim(dat)
#head(dat)
##########################################################
#unique(dat$cancer_type)
dat$cancer_type <- gsub("Lung_nsc", "NSCLC", dat$cancer_type)
dat$cancer_type <- gsub("Lung_small_cell" , "SCLC", dat$cancer_type)
dat$cancer_type <- gsub("Upper_aerodigestive" , "Upper aerodigestive", dat$cancer_type)
dat$cancer_type <- gsub("Urinary_tract" , "Urinary tract", dat$cancer_type)
#############################################################
#dat$cancer_type <- factor(pro_rna_1$cancer_type)
dat$cate <- factor(dat$cate,levels = unique(as.character(dat$cate))[2:1])
dat$value <- dat$value *0.2
dat$y_max <- dat$y_max *0.2
dat$cancer_type <- rep(LETTERS[1:16],times = 2)
library(ggplot2)
dat
##     cancer_type       cate  value y_min y_max
##  1:           A Proteomics  503.6     0   560
##  2:           B Proteomics  248.6     0   560
##  3:           C Proteomics  334.2     0   560
##  4:           D Proteomics  300.8     0   560
##  5:           E Proteomics  344.8     0   560
##  6:           F Proteomics  387.6     0   560
##  7:           G Proteomics  492.2     0   560
##  8:           H Proteomics  173.6     0   560
##  9:           I Proteomics  213.6     0   560
## 10:           J Proteomics  259.0     0   560
## 11:           K Proteomics  476.4     0   560
## 12:           L Proteomics  134.2     0   560
## 13:           M Proteomics  205.8     0   560
## 14:           N Proteomics   83.2     0   560
## 15:           O Proteomics  349.0     0   560
## 16:           P Proteomics   97.8     0   560
## 17:           A    RNA-seq 1577.6     0  2600
## 18:           B    RNA-seq 2072.2     0  2600
## 19:           C    RNA-seq 1497.4     0  2600
## 20:           D    RNA-seq 1832.2     0  2600
## 21:           E    RNA-seq 1555.4     0  2600
## 22:           F    RNA-seq 1760.0     0  2600
## 23:           G    RNA-seq 1428.0     0  2600
## 24:           H    RNA-seq 1726.0     0  2600
## 25:           I    RNA-seq 1736.6     0  2600
## 26:           J    RNA-seq 2514.2     0  2600
## 27:           K    RNA-seq 1604.2     0  2600
## 28:           L    RNA-seq 2135.8     0  2600
## 29:           M    RNA-seq 1814.6     0  2600
## 30:           N    RNA-seq  883.8     0  2600
## 31:           O    RNA-seq 1946.2     0  2600
## 32:           P    RNA-seq 1459.6     0  2600
##     cancer_type       cate  value y_min y_max
ggplot(dat, aes(x=cancer_type, y= value, fill= cate)) + 
  geom_bar(position=position_dodge(.9), stat="identity",
           colour="black", # Use black outlines
           size=.3, width = 0.75) +      # Thinner lines
  #geom_errorbar(aes(ymin = mean - se, ymax = mean + se),
   #             size=.3,    # Thinner lines
    #            width=.2,
     #           position=position_dodge(0.9)) +
  xlab("Count") +
  ylab("Count") +
  ggtitle("") +
  scale_fill_manual(name="Supplement type", # Legend label, use darker colors
                    breaks=c("Proteomics", "RNA-seq"),
                    labels=c("Proteomics", "RNA-seq"),
                    values=c("#CCCCCC","#FFFFFF")) +
  scale_y_continuous(expand = c(0,0),limits = ) +
  theme(panel.spacing = unit(0, "lines"),
    legend.position = "",
        legend.direction = "vertical",
        strip.placement = "outside",
        strip.background = element_blank(),
        strip.text = element_text(size= 12, color = "black",family = "sans"),
        legend.spacing.x = unit(0.1, 'cm'),
        legend.spacing.y = unit(0.2, 'cm'),
        legend.title.align = 0.1,
        legend.key.size = unit(.5, "cm"),
        legend.text = element_text(colour="blue", size=10, 
                                   face="bold"),
        legend.title = element_text(colour="red", size=10, 
                                    face="bold"),
        legend.background = element_rect(fill="lightblue",
                                         size=2, linetype="solid", 
                                         colour ="blue"),
        legend.key.height=unit(1.2,"line"),
        legend.key.width=unit(1.2,"line"),
        #legend.margin=margin(5,5,5,5),
        legend.justification = c(0, 1),
        legend.box.margin=margin(0,0,0,0),
        panel.background = element_blank(),
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        panel.grid = element_blank(),
        axis.text.x   = element_text(size= 12, color = "black",family = "sans",hjust = 1,vjust = 1, angle = 45),
        axis.text.y   = element_text(size= 12, color = "black",family = "sans",vjust = 0.5,hjust = 0.5),
        axis.title  = element_blank(),
        axis.ticks =  element_line(size= 0.5),
        axis.ticks.length = unit(3, "pt")) + 
        facet_wrap(.~ cate, nrow = 2, scales = "free_y",
                    strip.position = "left", 
                    labeller = as_labeller(c("Proteomics" = "Number of AA", 
                                             "RNA-seq" = "Number of BB"))) +
  geom_blank(aes(y = y_min)) +
  geom_blank(aes(y = y_max))

#?facet_wrap()

ggsave(filename = paste0(Sys.Date(),"-stat_chart_coord_1.tif"), plot = last_plot(), 
       device = "tiff", path = dir_path,
       scale = 1, width = 16, height = 16, units = "cm",dpi = 300, limitsize = TRUE, compression = "lzw")
#head(dat)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.6     v dplyr   1.0.8
## v readr   2.1.2     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::between()   masks data.table::between()
## x dplyr::filter()    masks stats::filter()
## x dplyr::first()     masks data.table::first()
## x dplyr::lag()       masks stats::lag()
## x dplyr::last()      masks data.table::last()
## x purrr::transpose() masks data.table::transpose()
dat_1 <- spread(dat[,-c(4,5)], key = cate, value = value, -1)
#head(dat_1)
write.csv(dat_1, paste0(dir_path,Sys.Date(),"-","pro_rna_gene_count.csv"),row.names = FALSE)