rm(list = ls())
a <- read.csv("up_go_kegg_1.csv",stringsAsFactors = F,header = T)
class(a);dim(a) #[1] 10 186
## [1] "data.frame"
## [1] 10 186
#View(a)
rownames(a) <- a$ID
#View(a)
b <- t(a[,-c(1,2,3)])
b <- data.frame(b)
colnames(b) <- gsub("GO.","GO:",colnames(b))
b_flag <- apply(b, 1, function(x) sum(!is.na(x)))
#View(b)
dim(b)
## [1] 183 10
head(b,2)
## GO:0005924 GO:0005925 GO:0030055 GO:0048841 hsa04014 hsa04150 hsa04360
## ABLIM1 NA NA NA NA NA NA 1.09
## ADAM9 1.12 1.12 1.12 NA NA NA NA
## hsa04520 hsa04722 hsa04810
## ABLIM1 NA NA NA
## ADAM9 NA NA NA
b <- b[,-c(1,3)]
dim(b)
## [1] 183 8
head(b)
## GO:0005925 GO:0048841 hsa04014 hsa04150 hsa04360 hsa04520 hsa04722
## ABLIM1 NA NA NA NA 1.09 NA NA
## ADAM9 1.12 NA NA NA NA NA NA
## AFAP1 1.18 NA NA NA NA NA NA
## AJUBA 1.22 NA NA NA NA NA NA
## ALKBH6 1.08 NA NA NA NA NA NA
## ARHGEF6 NA NA NA NA NA NA NA
## hsa04810
## ABLIM1 NA
## ADAM9 NA
## AFAP1 NA
## AJUBA NA
## ALKBH6 NA
## ARHGEF6 1.06
###sum the intersection number
g <- data.frame(c(1,2,3))
for (i in 1:8) {
for (j in 1:8) {
d <- cbind(b[i],b[j])
e <- apply(d,1,function(x) sum(is.na(x)))
f <- d[which(e == 0),]
g <- cbind(g,data.frame(c(colnames(d),nrow(f))))
}
}
#View(g)
g <- g[,-1]
dim(g)
## [1] 3 64
g[,1:5]
## c.colnames.d...nrow.f.. c.colnames.d...nrow.f...1 c.colnames.d...nrow.f...2
## 1 GO:0005925 GO:0005925 GO:0005925
## 2 GO:0005925 GO:0048841 hsa04014
## 3 80 1 4
## c.colnames.d...nrow.f...3 c.colnames.d...nrow.f...4
## 1 GO:0005925 GO:0005925
## 2 hsa04150 hsa04360
## 3 3 2
h <- t(g)
#View(h)
class(h)
## [1] "matrix" "array"
h <- data.frame(h)
head(h)
## X1 X2 X3
## c.colnames.d...nrow.f.. GO:0005925 GO:0005925 80
## c.colnames.d...nrow.f...1 GO:0005925 GO:0048841 1
## c.colnames.d...nrow.f...2 GO:0005925 hsa04014 4
## c.colnames.d...nrow.f...3 GO:0005925 hsa04150 3
## c.colnames.d...nrow.f...4 GO:0005925 hsa04360 2
## c.colnames.d...nrow.f...5 GO:0005925 hsa04520 5
###remove the self
j <- h[h$X1 != h$X2,]
colnames(j) #[1] "X1" "X2" "X3"
## [1] "X1" "X2" "X3"
#View(j)
#########################
head(j)
## X1 X2 X3
## c.colnames.d...nrow.f...1 GO:0005925 GO:0048841 1
## c.colnames.d...nrow.f...2 GO:0005925 hsa04014 4
## c.colnames.d...nrow.f...3 GO:0005925 hsa04150 3
## c.colnames.d...nrow.f...4 GO:0005925 hsa04360 2
## c.colnames.d...nrow.f...5 GO:0005925 hsa04520 5
## c.colnames.d...nrow.f...6 GO:0005925 hsa04722 3
dim(j) #[1] 56 3
## [1] 56 3
length(unique(j$X1))
## [1] 8
for (i in 1:56) {
j[i,c(1,2)] <- sort(j[i,c(1,2)])
}
j <- j[!duplicated(j),]
dim(j)
## [1] 28 3
#View(j)
###convert to symmetric matrix
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------- tidyverse 1.3.0 --
## √ ggplot2 3.3.2 √ purrr 0.3.4
## √ tibble 3.0.3 √ dplyr 1.0.1
## √ tidyr 1.1.2 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
k <- spread(j,key = X2, value = X3)
#View(k)
dim(k) #[1] 7 8
## [1] 7 8
####convert character to numeric
rownames(k) <- k$X1
k <- k[,-1]
k<- apply(k,c(1,2),as.numeric)
class(k) #[1] "data.frame"
## [1] "matrix" "array"
m <- as.matrix(k)
m
## GO:0048841 hsa04014 hsa04150 hsa04360 hsa04520 hsa04722 hsa04810
## GO:0005925 1 4 3 2 5 3 12
## GO:0048841 NA 0 2 7 0 0 0
## hsa04014 NA NA 7 10 4 12 13
## hsa04150 NA NA NA 7 1 6 5
## hsa04360 NA NA NA NA 2 6 8
## hsa04520 NA NA NA NA NA 1 3
## hsa04722 NA NA NA NA NA NA 7
library(circlize)
## ========================================
## circlize version 0.4.10
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
grid.col <- c( "GO:0005925" = "#63B8FF", "GO:0048841" = "#8470FF",
"hsa04014" = "#BDB76B", "hsa04150" = "#BDB76B", "hsa04360" = "#BDB76B", "hsa04520" = "#BDB76B",
"hsa04722" = "#BDB76B", "hsa04810" = "#BDB76B")
col_mat <- rand_color(length(m))
#chordDiagram(m,preAllocateTracks = 1,grid.col = grid.col,col = col_mat)
col = c("CC" = "#63B8FF","BP" = "#8470FF","KEGG pathway" = "#BDB76B")
###################
lwd_n <- matrix(1, nrow = nrow(m),ncol = ncol(m))
lwd_n[which(rownames(m)== "hsa04360"),] <- 2
lwd_n[,which(colnames(m)== "hsa04360")] <- 2
lwd_n[lower.tri(lwd_n,diag = F)] <- NA
###################
border_n <- matrix(NA, nrow = nrow(m),ncol = ncol(m))
border_n [which(rownames(m)== "hsa04360"),] <- "#EE00EE"
border_n [,which(colnames(m)== "hsa04360")] <- "#EE00EE"
border_n [lower.tri(border_n ,diag = F)] <- NA
##############repeat from here
circos.clear()
par(mar = c(0.5, 0.5, 0.5, 0.5), lwd = 0.1, cex = 1.2)
circos.par(start.degree = -16, clock.wise = FALSE)
chordDiagram(m,preAllocateTracks = 0,grid.col = grid.col,
col = col_mat,link.lwd = lwd_n,link.lty = 2,
link.border = border_n,annotationTrackHeight = convert_height(c(4, 3), "mm"))
legend(x = -0.5, y= -1.05, pch = 15, legend = names(col), col = col, box.lty=0, horiz=TRUE,cex = 0.75,text.width = 0.1)
