rm(list = ls())
###############################input data_1
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\data\\"
dir_path_name <- dir(dir_path,pattern = ".*.",full.names = T)
#dir_path_name
###############################merge data
pro <- read.csv(grep("data.csv",dir_path_name,value = T),header = T,stringsAsFactors = F)
#dim(pro) #[1] 23022 5
#View(pro)
#colnames(pro)
pro <- data.frame(table(pro$cancer_type))
colnames(pro)[2] <- "Proteomics"
colnames(pro)[1] <- "cancer_type"
library(openxlsx)
rna <- read.xlsx(grep("select_gene_set_rna.xlsx",dir_path_name,value = T), sheet = 1)
#dim(rna) #[1] 23 4
#View(rna)
colnames(rna)[3] <- "RNA-seq"
rna <- rna[, c(2,3)]
####################
#head(pro); head(rna)
pro_rna <- merge(pro, rna, by = "cancer_type")
#View(pro_rna)
library(stringr)
# Calling str_to_title() function
pro_rna$cancer_type <- str_to_title(pro_rna$cancer_type)
pro_rna$cancer_type[pro_rna$cancer_type == "Aml"] <- "AML"
##############
library(tidyr)
#colnames(pro_rna)
pro_rna_1 <- gather(pro_rna, key = "cate", value = "value", -1)
#View(pro_rna_1)
#head(pro_rna_1,2)
####################################################
#str(pro_rna_1)
#unique(pro_rna_1$cancer_type)
#unique(pro_rna_1$cate)
library(data.table)
dat <- data.table(pro_rna_1)
dat[cate == "Proteomics",y_min := 0]
dat[cate == "Proteomics",y_max := 2800]
dat[cate == "RNA-seq",y_min := 0]
dat[cate == "RNA-seq",y_max := 13000]
#view(dat)
#dim(dat)
#head(dat)
##########################################################
#unique(dat$cancer_type)
dat$cancer_type <- gsub("Lung_nsc", "NSCLC", dat$cancer_type)
dat$cancer_type <- gsub("Lung_small_cell" , "SCLC", dat$cancer_type)
dat$cancer_type <- gsub("Upper_aerodigestive" , "Upper aerodigestive", dat$cancer_type)
dat$cancer_type <- gsub("Urinary_tract" , "Urinary tract", dat$cancer_type)
#############################################################
#dat$cancer_type <- factor(pro_rna_1$cancer_type)
dat$cate <- factor(dat$cate,levels = unique(as.character(dat$cate))[2:1])
dat$value <- dat$value *0.2
dat$y_max <- dat$y_max *0.2
dat$cancer_type <- rep(LETTERS[1:16],times = 2)
library(ggplot2)
dat
## cancer_type cate value y_min y_max
## 1: A Proteomics 503.6 0 560
## 2: B Proteomics 248.6 0 560
## 3: C Proteomics 334.2 0 560
## 4: D Proteomics 300.8 0 560
## 5: E Proteomics 344.8 0 560
## 6: F Proteomics 387.6 0 560
## 7: G Proteomics 492.2 0 560
## 8: H Proteomics 173.6 0 560
## 9: I Proteomics 213.6 0 560
## 10: J Proteomics 259.0 0 560
## 11: K Proteomics 476.4 0 560
## 12: L Proteomics 134.2 0 560
## 13: M Proteomics 205.8 0 560
## 14: N Proteomics 83.2 0 560
## 15: O Proteomics 349.0 0 560
## 16: P Proteomics 97.8 0 560
## 17: A RNA-seq 1577.6 0 2600
## 18: B RNA-seq 2072.2 0 2600
## 19: C RNA-seq 1497.4 0 2600
## 20: D RNA-seq 1832.2 0 2600
## 21: E RNA-seq 1555.4 0 2600
## 22: F RNA-seq 1760.0 0 2600
## 23: G RNA-seq 1428.0 0 2600
## 24: H RNA-seq 1726.0 0 2600
## 25: I RNA-seq 1736.6 0 2600
## 26: J RNA-seq 2514.2 0 2600
## 27: K RNA-seq 1604.2 0 2600
## 28: L RNA-seq 2135.8 0 2600
## 29: M RNA-seq 1814.6 0 2600
## 30: N RNA-seq 883.8 0 2600
## 31: O RNA-seq 1946.2 0 2600
## 32: P RNA-seq 1459.6 0 2600
## cancer_type cate value y_min y_max
ggplot(dat, aes(x=cancer_type, y= value, fill= cate)) +
geom_bar(position=position_dodge(.9), stat="identity",
colour="black", # Use black outlines
size=.3, width = 0.75) + # Thinner lines
#geom_errorbar(aes(ymin = mean - se, ymax = mean + se),
# size=.3, # Thinner lines
# width=.2,
# position=position_dodge(0.9)) +
xlab("Count") +
ylab("Count") +
ggtitle("") +
scale_fill_manual(name="Supplement type", # Legend label, use darker colors
breaks=c("Proteomics", "RNA-seq"),
labels=c("Proteomics", "RNA-seq"),
values=c("#CCCCCC","#FFFFFF")) +
scale_y_continuous(expand = c(0,0),limits = ) +
theme(panel.spacing = unit(0, "lines"),
legend.position = "",
legend.direction = "vertical",
strip.placement = "outside",
strip.background = element_blank(),
strip.text = element_text(size= 12, color = "black",family = "sans"),
legend.spacing.x = unit(0.1, 'cm'),
legend.spacing.y = unit(0.2, 'cm'),
legend.title.align = 0.1,
legend.key.size = unit(.5, "cm"),
legend.text = element_text(colour="blue", size=10,
face="bold"),
legend.title = element_text(colour="red", size=10,
face="bold"),
legend.background = element_rect(fill="lightblue",
size=2, linetype="solid",
colour ="blue"),
legend.key.height=unit(1.2,"line"),
legend.key.width=unit(1.2,"line"),
#legend.margin=margin(5,5,5,5),
legend.justification = c(0, 1),
legend.box.margin=margin(0,0,0,0),
panel.background = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=1),
panel.grid = element_blank(),
axis.text.x = element_text(size= 12, color = "black",family = "sans",hjust = 1,vjust = 1, angle = 45),
axis.text.y = element_text(size= 12, color = "black",family = "sans",vjust = 0.5,hjust = 0.5),
axis.title = element_blank(),
axis.ticks = element_line(size= 0.5),
axis.ticks.length = unit(3, "pt")) +
facet_wrap(.~ cate, nrow = 2, scales = "free_y",
strip.position = "left",
labeller = as_labeller(c("Proteomics" = "Number of AA",
"RNA-seq" = "Number of BB"))) +
geom_blank(aes(y = y_min)) +
geom_blank(aes(y = y_max))

#?facet_wrap()
ggsave(filename = paste0(Sys.Date(),"-stat_chart_coord_1.tif"), plot = last_plot(),
device = "tiff", path = dir_path,
scale = 1, width = 16, height = 16, units = "cm",dpi = 300, limitsize = TRUE, compression = "lzw")
#head(dat)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.6 v dplyr 1.0.8
## v readr 2.1.2 v forcats 0.5.1
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::between() masks data.table::between()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks data.table::first()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks data.table::last()
## x purrr::transpose() masks data.table::transpose()
dat_1 <- spread(dat[,-c(4,5)], key = cate, value = value, -1)
#head(dat_1)
write.csv(dat_1, paste0(dir_path,Sys.Date(),"-","pro_rna_gene_count.csv"),row.names = FALSE)