If we were to compare the number of overlapping pathways for each KO against the number of overlapping modules for each KO
library(ggplot2)
library(dplyr)
library(magrittr)
library(MetamapsDB)
connect(url="localhost", port=7474, username="neo4j", password="1231234")
moduledf = dbquery("
match
(m1:module)--(k:ko)--(m2:module)
where
id(m1) > id(m2)
return
m1.module AS module1,
k.ko AS ko,
m2.module AS module2
") %>% make.data.frame %>%
group_by(ko) %>%
summarise(moduleCount = length(unique(c(module1, module2)))) %>%
arrange(desc(moduleCount))
moduledf %>% as.tbl
## Source: local data frame [419 x 2]
##
## ko moduleCount
## 1 ko:K12340 11
## 2 ko:K00024 9
## 3 ko:K00133 7
## 4 ko:K00134 7
## 5 ko:K00927 7
## 6 ko:K00928 7
## 7 ko:K01595 7
## 8 ko:K03094 7
## 9 ko:K00239 6
## 10 ko:K00240 6
## .. ... ...
pathwayDF = dbquery("
match
(p1:pathway)--(k:ko)--(p2:pathway)
where
id(p1) > id(p2)
return
p1.pathwayname AS pathway1,
k.ko AS ko,
p2.pathwayname AS pathway2
") %>% make.data.frame %>%
group_by(ko) %>%
summarise(pathwayCount = length(unique(c(pathway1, pathway2)))) %>%
arrange(desc(pathwayCount))
pathwayDF %>% as.tbl
## Source: local data frame [3,225 x 2]
##
## ko pathwayCount
## 1 ko:K00128 20
## 2 ko:K00626 19
## 3 ko:K01692 19
## 4 ko:K14085 18
## 5 ko:K01782 17
## 6 ko:K01825 17
## 7 ko:K07511 17
## 8 ko:K07515 17
## 9 ko:K14454 17
## 10 ko:K14455 17
## .. ... ...
There exists 419 kos which overlap 2 or more modules, wheres there exists 3225 kos which overlap 2 or more modules
qplot(x=c("module", "pathway"), y=c(nrow(moduledf), nrow(pathwayDF)), fill=c("module", "pathway"), geom="bar", stat="identity")
pathwayDF %>%
ggplot(aes(reorder(ko, pathwayCount), pathwayCount)) +
geom_bar(stat="identity")
## Warning: position_stack requires constant width: output may be incorrect
moduledf %>%
ggplot(aes(reorder(ko, moduleCount), moduleCount)) +
geom_bar(stat="identity")