rm(list = ls())
##############################input data 
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\"
dir_path_name <- list.files(pattern = ".*txt",dir_path,full.names = T, recursive = F)
dir_path_name
## [1] "C:\\Users\\liyix\\OneDrive\\Desktop\\all_gene_cancer_overlap_pathway.txt"
data_1 <- read.delim(grep("all_gene_cancer_overlap_pathway.txt",dir_path_name,value = T),header = T,stringsAsFactors = F)
#unique(data_1$cancer_type)
data_li <- data_1[data_1$cancer_type == "liver",]
#unique(data_li$pathway)
#dim(data_li)
data_li <- data_li[-grep("B Cell Receptor Signaling Pathway", data_li$pathway), ]
#dim(data_li)
data_fre <- data.frame(table(data_li$hgnc_symbol))
#View(data_fre)
#colnames(data_1)
data_fre <- data_fre[order(data_fre$Freq,decreasing = T),][1:10,]
colnames(data_fre) <- c("hgnc_symbol", "Freq")
#View(data_li)
#colnames(data_li)
#head(data_fre)
#head(data_li)
#################################
data_2 <- merge(data_li, data_fre, by = "hgnc_symbol")
#dim(data_2)  #[1] 24 14
#sum(data_fre$Freq) #[1] 31
#length(unique(data_2$hgnc_symbol)) #[1] 10
#colnames(data_2)
#head(data_2)
#dim(data_2)

data_21 <- data_2
data_21$pathway <- data_21$FDR.in.proteomics <- data_21$FDR.in.RNAseq <- NULL
data_21 <- unique(data_21)
data_22 <- data_21[order(data_21$Freq), ][1:10,]
#View(data_22)
#write.csv(data_22, paste0(dir_path,Sys.Date(),"-","10_gene_list.csv"),row.names = F)

data_3 <- data_2[,c("hgnc_symbol", "pathway")]
###########################################################
head(data_3,6)
##   hgnc_symbol                                 pathway
## 1      AGPAT5                              Metabolism
## 2     ALDH1A3                              Metabolism
## 3      ARPC5L Fc gamma receptor-mediated phagocytosis
## 4         HK1                              Metabolism
## 5         HK1             Biosynthesis of antibiotics
## 6     HLA-DRA       T cell receptor signaling pathway
#View(data_3)
library(ggplot2)
library(GGally)
## Warning: package 'GGally' was built under R version 4.0.4
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
str(data_3)
## 'data.frame':    24 obs. of  2 variables:
##  $ hgnc_symbol: chr  "AGPAT5" "ALDH1A3" "ARPC5L" "HK1" ...
##  $ pathway    : chr  "Metabolism" "Metabolism" "Fc gamma receptor-mediated phagocytosis" "Metabolism" ...
data_3$hgnc_symbol_1 <-  as.numeric(factor(data_3$hgnc_symbol))
data_3$pathway_1 <-  as.numeric(factor(data_3$pathway))
head(data_3)
##   hgnc_symbol                                 pathway hgnc_symbol_1 pathway_1
## 1      AGPAT5                              Metabolism             1         8
## 2     ALDH1A3                              Metabolism             2         8
## 3      ARPC5L Fc gamma receptor-mediated phagocytosis             3         6
## 4         HK1                              Metabolism             4         8
## 5         HK1             Biosynthesis of antibiotics             4         4
## 6     HLA-DRA       T cell receptor signaling pathway             5         9
library(yarrr)
## Loading required package: jpeg
## Loading required package: BayesFactor
## Loading required package: coda
## Loading required package: Matrix
## ************
## Welcome to BayesFactor 0.9.12-4.2. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
## 
## Type BFManual() to open the manual.
## ************
## Loading required package: circlize
## ========================================
## circlize version 0.4.12
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
## yarrr v0.1.5. Citation info at citation('yarrr'). Package guide at yarrr.guide()
## Email me at Nathaniel.D.Phillips.is@gmail.com
## 
## Attaching package: 'yarrr'
## The following object is masked from 'package:ggplot2':
## 
##     diamonds
piratepal(palette = "basel")
##       blue1         red       green        pink      orange       blue2 
## "#0C5BB0FF" "#EE0011FF" "#15983DFF" "#EC579AFF" "#FA6B09FF" "#149BEDFF" 
##      green2      yellow   turquoise        poop 
## "#A1C720FF" "#FEC10BFF" "#16A08CFF" "#9A703EFF"
str(data_3)
## 'data.frame':    24 obs. of  4 variables:
##  $ hgnc_symbol  : chr  "AGPAT5" "ALDH1A3" "ARPC5L" "HK1" ...
##  $ pathway      : chr  "Metabolism" "Metabolism" "Fc gamma receptor-mediated phagocytosis" "Metabolism" ...
##  $ hgnc_symbol_1: num  1 2 3 4 4 5 5 5 6 6 ...
##  $ pathway_1    : num  8 8 6 8 4 9 7 5 3 1 ...
if(length(unique(data_3$hgnc_symbol)) > length(unique(data_3$pathway))){
  data_4 <- unique(data_3[,c("hgnc_symbol","hgnc_symbol_1")])
  data_4 <- data_4[order(data_4$hgnc_symbol_1,decreasing = F),]
  data_4$hgnc_symbol_2 <- seq(1,length(unique(data_3$pathway)),length.out = length(unique(data_3$hgnc_symbol)))
  data_3 <- merge(data_3,data_4, by = c("hgnc_symbol","hgnc_symbol_1"))
  #view(data_3)
  head(data_3,2)
  str(data_3)
  data_3$hgnc_symbol_3 <- factor(data_3$hgnc_symbol)
  str(data_3)
  #plot
  #dev.off()
  p1 <-  ggparcoord(data_3[,c("hgnc_symbol_2","pathway_1","hgnc_symbol_3")], 
             columns = 1:2, scale = "globalminmax", 
             showPoints = T,boxplot = F,
             splineFactor= T, alphaLines =1,
             mapping = ggplot2::aes(size = 1),
             groupColumn = 3, title = "") + 
    scale_color_manual(values= as.character(piratepal(palette = "basel"))) +
    #symbol add
    geom_text(data = data_3,
              aes(x = 0.95, y =  hgnc_symbol_2, label = hgnc_symbol),
              inherit.aes = F,hjust = 1) +
    # optional: remove 
    #scale_x_discrete(labels = function(x) c("", x[-1])) + 
    # also optional: hide legend, which doesn't really seem relevant here
    geom_text(data = data_3,
              aes(x = 2+0.05, y = pathway_1, label = pathway),
              inherit.aes = F,hjust = 0) +
    theme_void() +
    theme(legend.position = "") +
    scale_x_continuous(limits = c(0.6,3.5))
  
 
  
  
}
## 'data.frame':    24 obs. of  5 variables:
##  $ hgnc_symbol  : chr  "AGPAT5" "ALDH1A3" "ARPC5L" "HK1" ...
##  $ hgnc_symbol_1: num  1 2 3 4 4 5 5 5 6 6 ...
##  $ pathway      : chr  "Metabolism" "Metabolism" "Fc gamma receptor-mediated phagocytosis" "Metabolism" ...
##  $ pathway_1    : num  8 8 6 8 4 9 7 5 3 1 ...
##  $ hgnc_symbol_2: num  1 1.89 2.78 3.67 3.67 ...
## 'data.frame':    24 obs. of  6 variables:
##  $ hgnc_symbol  : chr  "AGPAT5" "ALDH1A3" "ARPC5L" "HK1" ...
##  $ hgnc_symbol_1: num  1 2 3 4 4 5 5 5 6 6 ...
##  $ pathway      : chr  "Metabolism" "Metabolism" "Fc gamma receptor-mediated phagocytosis" "Metabolism" ...
##  $ pathway_1    : num  8 8 6 8 4 9 7 5 3 1 ...
##  $ hgnc_symbol_2: num  1 1.89 2.78 3.67 3.67 ...
##  $ hgnc_symbol_3: Factor w/ 10 levels "AGPAT5","ALDH1A3",..: 1 2 3 4 4 5 5 5 6 6 ...
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
#  else{
#    data_4 <- unique(data_3[,c("pathway","pathway_1")])
#    data_4 <- data_4[order(data_4$pathway_1,decreasing = F),]
#    data_4$pathway_2 <- seq(1,length(unique(data_3$hgnc_symbol)),length.out = length(unique(data_3$pathway)))
#    data_3 <- merge(data_3,data_4, by = c("pathway","pathway_1"))
#  }
p1

################output
ggsave(paste0(Sys.Date(),"-Parallel-2.tiff"), plot = last_plot(), device = "tiff", path = dir_path,
       scale = 1, width = 15, height = 8, units ="cm",dpi = 300, limitsize = TRUE)
#?ggparcoord
#40*39/2
#unique(data_3$hgnc_symbol)