If we were to compare the number of overlapping pathways for each KO against the number of overlapping modules for each KO

library(ggplot2)
library(dplyr)
library(magrittr)
library(MetamapsDB)
connect(url="localhost", port=7474, username="neo4j", password="1231234")

Module

moduledf = dbquery("
match 
    (m1:module)--(k:ko)--(m2:module)
where 
    id(m1) > id(m2)
return
    m1.module AS module1,
    k.ko      AS ko,
    m2.module AS module2
") %>% make.data.frame %>% 
group_by(ko) %>% 
summarise(moduleCount = length(unique(c(module1, module2)))) %>% 
arrange(desc(moduleCount))

moduledf %>% as.tbl
## Source: local data frame [419 x 2]
## 
##           ko moduleCount
## 1  ko:K12340          11
## 2  ko:K00024           9
## 3  ko:K00133           7
## 4  ko:K00134           7
## 5  ko:K00927           7
## 6  ko:K00928           7
## 7  ko:K01595           7
## 8  ko:K03094           7
## 9  ko:K00239           6
## 10 ko:K00240           6
## ..       ...         ...

Pathway

pathwayDF = dbquery("
match 
    (p1:pathway)--(k:ko)--(p2:pathway)
where 
    id(p1) > id(p2)
return
    p1.pathwayname AS pathway1,
    k.ko      AS ko,
    p2.pathwayname AS pathway2
") %>% make.data.frame %>% 
group_by(ko) %>% 
summarise(pathwayCount = length(unique(c(pathway1, pathway2)))) %>% 
arrange(desc(pathwayCount))

pathwayDF %>% as.tbl
## Source: local data frame [3,225 x 2]
## 
##           ko pathwayCount
## 1  ko:K00128           20
## 2  ko:K00626           19
## 3  ko:K01692           19
## 4  ko:K14085           18
## 5  ko:K01782           17
## 6  ko:K01825           17
## 7  ko:K07511           17
## 8  ko:K07515           17
## 9  ko:K14454           17
## 10 ko:K14455           17
## ..       ...          ...

There exists 419 kos which overlap 2 or more modules, wheres there exists 3225 kos which overlap 2 or more modules

qplot(x=c("module", "pathway"), y=c(nrow(moduledf), nrow(pathwayDF)), fill=c("module", "pathway"), geom="bar", stat="identity")

plot of chunk unnamed-chunk-1

pathwayDF %>% 
ggplot(aes(reorder(ko, pathwayCount), pathwayCount)) +
geom_bar(stat="identity")
## Warning: position_stack requires constant width: output may be incorrect

plot of chunk plot

moduledf %>% 
ggplot(aes(reorder(ko, moduleCount), moduleCount)) +
geom_bar(stat="identity")

plot of chunk plot