rm(list = ls())
##############################input data
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\"
dir_path_name <- list.files(pattern = ".*txt",dir_path,full.names = T, recursive = F)
dir_path_name
## [1] "C:\\Users\\liyix\\OneDrive\\Desktop\\all_gene_cancer_overlap_pathway.txt"
data_1 <- read.delim(grep("all_gene_cancer_overlap_pathway.txt",dir_path_name,value = T),header = T,stringsAsFactors = F)
#unique(data_1$cancer_type)
data_li <- data_1[data_1$cancer_type == "liver",]
#unique(data_li$pathway)
#dim(data_li)
data_li <- data_li[-grep("B Cell Receptor Signaling Pathway", data_li$pathway), ]
#dim(data_li)
data_fre <- data.frame(table(data_li$hgnc_symbol))
#View(data_fre)
#colnames(data_1)
data_fre <- data_fre[order(data_fre$Freq,decreasing = T),][1:10,]
colnames(data_fre) <- c("hgnc_symbol", "Freq")
#View(data_li)
#colnames(data_li)
#head(data_fre)
#head(data_li)
#################################
data_2 <- merge(data_li, data_fre, by = "hgnc_symbol")
#dim(data_2) #[1] 24 14
#sum(data_fre$Freq) #[1] 31
#length(unique(data_2$hgnc_symbol)) #[1] 10
#colnames(data_2)
#head(data_2)
#dim(data_2)
data_21 <- data_2
data_21$pathway <- data_21$FDR.in.proteomics <- data_21$FDR.in.RNAseq <- NULL
data_21 <- unique(data_21)
data_22 <- data_21[order(data_21$Freq), ][1:10,]
#View(data_22)
#write.csv(data_22, paste0(dir_path,Sys.Date(),"-","10_gene_list.csv"),row.names = F)
data_3 <- data_2[,c("hgnc_symbol", "pathway")]
###########################################################
head(data_3,6)
## hgnc_symbol pathway
## 1 AGPAT5 Metabolism
## 2 ALDH1A3 Metabolism
## 3 ARPC5L Fc gamma receptor-mediated phagocytosis
## 4 HK1 Metabolism
## 5 HK1 Biosynthesis of antibiotics
## 6 HLA-DRA T cell receptor signaling pathway
#View(data_3)
library(ggplot2)
library(GGally)
## Warning: package 'GGally' was built under R version 4.0.4
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
str(data_3)
## 'data.frame': 24 obs. of 2 variables:
## $ hgnc_symbol: chr "AGPAT5" "ALDH1A3" "ARPC5L" "HK1" ...
## $ pathway : chr "Metabolism" "Metabolism" "Fc gamma receptor-mediated phagocytosis" "Metabolism" ...
data_3$hgnc_symbol_1 <- as.numeric(factor(data_3$hgnc_symbol))
data_3$pathway_1 <- as.numeric(factor(data_3$pathway))
head(data_3)
## hgnc_symbol pathway hgnc_symbol_1 pathway_1
## 1 AGPAT5 Metabolism 1 8
## 2 ALDH1A3 Metabolism 2 8
## 3 ARPC5L Fc gamma receptor-mediated phagocytosis 3 6
## 4 HK1 Metabolism 4 8
## 5 HK1 Biosynthesis of antibiotics 4 4
## 6 HLA-DRA T cell receptor signaling pathway 5 9
library(yarrr)
## Loading required package: jpeg
## Loading required package: BayesFactor
## Loading required package: coda
## Loading required package: Matrix
## ************
## Welcome to BayesFactor 0.9.12-4.2. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
##
## Type BFManual() to open the manual.
## ************
## Loading required package: circlize
## ========================================
## circlize version 0.4.12
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
## yarrr v0.1.5. Citation info at citation('yarrr'). Package guide at yarrr.guide()
## Email me at Nathaniel.D.Phillips.is@gmail.com
##
## Attaching package: 'yarrr'
## The following object is masked from 'package:ggplot2':
##
## diamonds
piratepal(palette = "basel")
## blue1 red green pink orange blue2
## "#0C5BB0FF" "#EE0011FF" "#15983DFF" "#EC579AFF" "#FA6B09FF" "#149BEDFF"
## green2 yellow turquoise poop
## "#A1C720FF" "#FEC10BFF" "#16A08CFF" "#9A703EFF"
str(data_3)
## 'data.frame': 24 obs. of 4 variables:
## $ hgnc_symbol : chr "AGPAT5" "ALDH1A3" "ARPC5L" "HK1" ...
## $ pathway : chr "Metabolism" "Metabolism" "Fc gamma receptor-mediated phagocytosis" "Metabolism" ...
## $ hgnc_symbol_1: num 1 2 3 4 4 5 5 5 6 6 ...
## $ pathway_1 : num 8 8 6 8 4 9 7 5 3 1 ...
if(length(unique(data_3$hgnc_symbol)) > length(unique(data_3$pathway))){
data_4 <- unique(data_3[,c("hgnc_symbol","hgnc_symbol_1")])
data_4 <- data_4[order(data_4$hgnc_symbol_1,decreasing = F),]
data_4$hgnc_symbol_2 <- seq(1,length(unique(data_3$pathway)),length.out = length(unique(data_3$hgnc_symbol)))
data_3 <- merge(data_3,data_4, by = c("hgnc_symbol","hgnc_symbol_1"))
#view(data_3)
head(data_3,2)
str(data_3)
data_3$hgnc_symbol_3 <- factor(data_3$hgnc_symbol)
str(data_3)
#plot
#dev.off()
p1 <- ggparcoord(data_3[,c("hgnc_symbol_2","pathway_1","hgnc_symbol_3")],
columns = 1:2, scale = "globalminmax",
showPoints = T,boxplot = F,
splineFactor= T, alphaLines =1,
mapping = ggplot2::aes(size = 1),
groupColumn = 3, title = "") +
scale_color_manual(values= as.character(piratepal(palette = "basel"))) +
#symbol add
geom_text(data = data_3,
aes(x = 0.95, y = hgnc_symbol_2, label = hgnc_symbol),
inherit.aes = F,hjust = 1) +
# optional: remove
#scale_x_discrete(labels = function(x) c("", x[-1])) +
# also optional: hide legend, which doesn't really seem relevant here
geom_text(data = data_3,
aes(x = 2+0.05, y = pathway_1, label = pathway),
inherit.aes = F,hjust = 0) +
theme_void() +
theme(legend.position = "") +
scale_x_continuous(limits = c(0.6,3.5))
}
## 'data.frame': 24 obs. of 5 variables:
## $ hgnc_symbol : chr "AGPAT5" "ALDH1A3" "ARPC5L" "HK1" ...
## $ hgnc_symbol_1: num 1 2 3 4 4 5 5 5 6 6 ...
## $ pathway : chr "Metabolism" "Metabolism" "Fc gamma receptor-mediated phagocytosis" "Metabolism" ...
## $ pathway_1 : num 8 8 6 8 4 9 7 5 3 1 ...
## $ hgnc_symbol_2: num 1 1.89 2.78 3.67 3.67 ...
## 'data.frame': 24 obs. of 6 variables:
## $ hgnc_symbol : chr "AGPAT5" "ALDH1A3" "ARPC5L" "HK1" ...
## $ hgnc_symbol_1: num 1 2 3 4 4 5 5 5 6 6 ...
## $ pathway : chr "Metabolism" "Metabolism" "Fc gamma receptor-mediated phagocytosis" "Metabolism" ...
## $ pathway_1 : num 8 8 6 8 4 9 7 5 3 1 ...
## $ hgnc_symbol_2: num 1 1.89 2.78 3.67 3.67 ...
## $ hgnc_symbol_3: Factor w/ 10 levels "AGPAT5","ALDH1A3",..: 1 2 3 4 4 5 5 5 6 6 ...
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
# else{
# data_4 <- unique(data_3[,c("pathway","pathway_1")])
# data_4 <- data_4[order(data_4$pathway_1,decreasing = F),]
# data_4$pathway_2 <- seq(1,length(unique(data_3$hgnc_symbol)),length.out = length(unique(data_3$pathway)))
# data_3 <- merge(data_3,data_4, by = c("pathway","pathway_1"))
# }
p1

################output
ggsave(paste0(Sys.Date(),"-Parallel-2.tiff"), plot = last_plot(), device = "tiff", path = dir_path,
scale = 1, width = 15, height = 8, units ="cm",dpi = 300, limitsize = TRUE)
#?ggparcoord
#40*39/2
#unique(data_3$hgnc_symbol)