Re-building UpSetR chart with ggplot/cowplot bricks

Sys.setlocale("LC_CTYPE","russian")
library(data.table)
library(readr)
library(dplyr)
library(stringr)
library(scales)
library(cowplot)

Intro

The idea of UpSet diagrams https://vcg.github.io/upset is brilliant and the R package https://github.com/hms-dbmi/UpSetR/ provides a wide set of option, yet there is always something that is missing. I am a real newbie in programming, so instead of looking into the code of UpSet and doing some magic cloning via Git, I decided to re-build the UpsetR diagram using a basic ggplot/cowplot functionality.

Data

Assume that there are many objects measured with 4 metrics - a, b, c, d.

Let’s generate some data (no matter how weird is my method):

dataw<- data.frame(obj=paste0("object_",1:50),
  a=ceiling(sample(0:1,50, replace=TRUE)*sample(0:50,50, replace=TRUE)),
  b=ceiling(sample(0:1,50, replace=TRUE)*sample(0:100,50, replace=TRUE)),
  c=ceiling(sample(0:1,50, replace=TRUE)*sample(0:50,50, replace=TRUE)),
  d=ceiling(sample(0:1,50, replace=TRUE)*sample(0:100,50, replace=TRUE)))
head(dataw)

##        obj  a  b  c  d
## 1 object_1 45  0 24 81
## 2 object_2 40  0  7 80
## 3 object_3  9  0  0  0
## 4 object_4 37 44 19  0
## 5 object_5  0 90  0  0
## 6 object_6  0  0 48 33

Now let’s tidy up our data:

dataw<- dataw %>% mutate(pp=1) %>%
  ## pp states for a valid (existing) observation, its value is always 1
  mutate(aa=as.integer(ifelse(a==0,0,1))) %>%
  mutate(bb=as.integer(ifelse(b==0,0,1))) %>%
  mutate(cc=as.integer(ifelse(c==0,0,1))) %>%
  mutate(dd=as.integer(ifelse(d==0,0,1))) %>%  
  # and let's code it via p (metric's value>0) and n (metric's value = 0)
  mutate(code=as.character(paste0(pp,aa,bb,cc,dd))) %>% 
  mutate(code=gsub("1","p",code)) %>% 
  mutate(code=gsub("0","n",code))
head(dataw)

##        obj  a  b  c  d pp aa bb cc dd  code
## 1 object_1 45  0 24 81  1  1  0  1  1 ppnpp
## 2 object_2 40  0  7 80  1  1  0  1  1 ppnpp
## 3 object_3  9  0  0  0  1  1  0  0  0 ppnnn
## 4 object_4 37 44 19  0  1  1  1  1  0 ppppn
## 5 object_5  0 90  0  0  1  0  1  0  0 pnpnn
## 6 object_6  0  0 48 33  1  0  0  1  1 pnnpp

I also have to convert the data into narrow format and prepare the dataframes fo visualization. Even though the pipes are very convenient, I do like to produce separate dataframes.

datan<- dataw %>% melt(measure.vars=c("pp","aa","bb","cc","dd"), variable.name="metric_name", value.name="metric")
head(datan)

##        obj  a  b  c  d  code metric_name metric
## 1 object_1 45  0 24 81 ppnpp          pp      1
## 2 object_2 40  0  7 80 ppnpp          pp      1
## 3 object_3  9  0  0  0 ppnnn          pp      1
## 4 object_4 37 44 19  0 ppppn          pp      1
## 5 object_5  0 90  0  0 pnpnn          pp      1
## 6 object_6  0  0 48 33 pnnpp          pp      1

data1<-datan %>% 
  group_by(code, metric_name, metric) %>% summarize(n=n_distinct(obj)) %>% 
  arrange(n)
head(data1)

## # A tibble: 6 x 4
## # Groups:   code, metric_name [6]
##   code  metric_name metric     n
##   <chr> <fct>        <dbl> <int>
## 1 pnnnp pp               1     1
## 2 pnnnp aa               0     1
## 3 pnnnp bb               0     1
## 4 pnnnp cc               0     1
## 5 pnnnp dd               1     1
## 6 pnppn pp               1     1

data2<-datan %>% 
  group_by(metric_name) %>% summarize(n=sum(metric))
head(data2)

## # A tibble: 5 x 2
##   metric_name     n
##   <fct>       <dbl>
## 1 pp             50
## 2 aa             24
## 3 bb             19
## 4 cc             31
## 5 dd             27

data4<-datan %>% 
  group_by(code) %>% summarize(n=n_distinct(obj)) %>% 
  arrange(desc(n))
head(data4)

## # A tibble: 6 x 2
##   code      n
##   <chr> <int>
## 1 ppnpp     7
## 2 pnnpn     6
## 3 pnnpp     6
## 4 pnppp     4
## 5 ppnnn     4
## 6 pppnp     4

data3<-dataw %>% left_join(.,data4)

## Joining, by = "code"

head(data3)

##        obj  a  b  c  d pp aa bb cc dd  code n
## 1 object_1 45  0 24 81  1  1  0  1  1 ppnpp 7
## 2 object_2 40  0  7 80  1  1  0  1  1 ppnpp 7
## 3 object_3  9  0  0  0  1  1  0  0  0 ppnnn 4
## 4 object_4 37 44 19  0  1  1  1  1  0 ppppn 3
## 5 object_5  0 90  0  0  1  0  1  0  0 pnpnn 2
## 6 object_6  0  0 48 33  1  0  0  1  1 pnnpp 6

codes<-data4 %>% arrange(desc(n)) 
scale_p<-as.character(codes$code)

cols <- c("0" = "white", "1" = "dark blue")  

point <- scales::format_format(big.mark = " ", decimal.mark = ".", scientific = FALSE, digits=0)  # cosy wrapper for format

Data

Now we are ready for visualization. There are 8 separate graphs to be later combined with cowplot function.

Chart1. Bar with a total number of objects/observations

emptybar<-ggplot(data3[1,])+
  geom_text(aes(x=-3, y=9, label="TOTAL\nno of objects"), 
            hjust=0, size=8)+
  geom_text(aes(x=-3, y=7.5, label=as.character(point(NROW(data3)))), 
            hjust=0, size=15, color="brown", fontface="bold")+
  scale_y_continuous(limits=c(-1,10),
                     breaks=c(0,1,3,5,7,9),
                     position="right")+
  scale_x_continuous(limits=c(-3,1.5))+  
  theme_minimal()+
  theme(legend.position = "none", 
        axis.title = element_blank(),
        axis.text = element_blank(),
        panel.grid = element_blank(),
        plot.margin = margin(0,0,0,0,"cm"))
emptybar

Chart2. Totals for intersections

bars_f<-
  ggplot(data1, aes(x=reorder(factor(code),-n), y=n))+
  geom_col(fill="brown", position="dodge")+
  geom_text(aes(label=point(n)),
            size=7,
            position = position_dodge(0.9),
            hjust=0.5, vjust=-0.25)+
  scale_x_discrete(limits=scale_p)+
  scale_y_continuous(expand = expand_scale(mult = c(0, .1)))+
  theme_minimal()+
  theme(legend.position = "none", 
        axis.title = element_blank(),
        axis.text = element_blank(),
        panel.grid = element_blank(),
        plot.margin = margin(0,0,0,0,"cm"))
bars_f

Chart 3. Totals for metrics

bars_metric_f<-
  ggplot(data2, aes(x=metric_name, y=n))+
  geom_col(fill="dark blue", position="dodge")+
  geom_text(aes(label=point(n)),
            position = position_dodge(0.9),
            size=7, hjust=1.1, vjust=0.5)+
  scale_x_discrete(position="top",
                   limits=c("dd","cc","bb","aa","pp"))+
  scale_y_reverse(labels=scales::format_format(big.mark = " ", decimal.mark = ".", scientific = FALSE, digits=0),expand = expand_scale(mult = c(0.6, 0)))+
  coord_flip()+
  theme_minimal()+
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size=18),
        panel.grid = element_blank(),
        plot.margin = margin(0,0,0,0,"cm"))
bars_metric_f

Chart 4. The Intersections itself

dots1_f<-
  ggplot(data1, aes(y=metric_name, x=reorder(factor(code),-n)))+
  geom_point(shape=21, size=6, colour="black", aes(fill=factor(metric)))+
  scale_fill_manual(values = cols)+
  scale_x_discrete(limits=scale_p)+
  scale_y_discrete(limits=c("dd","cc","bb","aa","pp"))+
  theme_minimal()+
  labs(x="", y="")+
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text = element_blank(),
        plot.margin = margin(0,0,0,0,"cm"))
dots1_f

Additional boxplots

Chart 5. Metric 1, right section

box1_f<-
  ggplot(data3, aes(x=reorder(factor(code),-n), y=a))+
  geom_boxplot(fill="coral", alpha=0.9, outlier.alpha = 0)+
  scale_y_continuous(limits=c(-1,max(data3$a)))+
  scale_x_discrete(limits=scale_p)+
  theme_minimal()+
  labs(x="", y="")+
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text = element_blank(),
        axis.line = element_line(),
        plot.margin = margin(0,0,0,0,"cm"))
box1_f

Chart 6. Metric 1, left section

box1_e<-
  ggplot(data3, aes(x=1, y=a))+
  geom_boxplot(fill="coral", outlier.alpha = 0)+
  geom_text(aes(x=-3, y=0.9*max(data3$a), label="TOTAL\n(metric a)"), 
            hjust=0, size=8)+
  geom_text(aes(x=-3, y=0.75*max(data3$a), label=as.character(point(sum(data3$a)))), 
            hjust=0, size=15, color="coral", fontface="bold")+
  scale_y_continuous(limits=c(-1,max(data3$a)),
                     breaks=pretty_breaks(n=5),
                     position="right")+

  scale_x_continuous(limits=c(-3,1.5))+  
  theme_minimal()+
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size=18, hjust=0),
        axis.ticks.y =element_line(),
        axis.line = element_blank(),
        panel.grid = element_blank(),
        plot.margin = margin(0,0,0,0,"cm"))
box1_e

Chart 7. Metric 2, right section

box2_f<-
  ggplot(data3, aes(x=reorder(factor(code),-n), y=b))+
  geom_boxplot(fill="green", outlier.alpha = 0)+
  scale_y_continuous(limits=c(-1,max(data3$b)))+
  scale_x_discrete(limits=scale_p)+
  theme_minimal()+
  labs(x="", y="")+
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text = element_blank(),
        axis.line = element_line(),
        plot.margin = margin(0,0,0,0,"cm"))
box2_f

Chart 8. Metric 2, left section

box2_e<-ggplot(data3, aes(x=1, y=b))+
  geom_boxplot(fill="green", outlier.alpha = 0)+
  geom_text(aes(x=-3, y=0.9*max(data3$b), label="TOTAL\n(metric b)"), 
            hjust=0, size=8)+
  geom_text(aes(x=-3, y=0.75*max(data3$b), label=as.character(point(sum(data3$b)))), 
            hjust=0, size=15, color="green", fontface="bold")+
  scale_y_continuous(limits=c(-1,max(data3$b)),
                     breaks=pretty_breaks(n=5),
                     position="right")+
  scale_x_continuous(limits=c(-3,1.5))+  
  theme_minimal()+
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size=18, hjust=1),
        axis.line.y = element_blank(),
        axis.ticks.y =element_line(), 
        panel.grid = element_blank(),
        plot.margin = margin(0,0,0,0,"cm"))
box2_e

UpSet

So now we can build it

p2s<- plot_grid(emptybar, bars_f, 
            bars_metric_f, dots1_f,
            box1_e,box1_f,
            box2_e, box2_f,
            labels=c("A","","","","B","","C",""),
            label_size = 24,
            ncol=2, align="h", 
            rel_widths = c(1,3.5, 1,3.5), rel_heights = c(2,1,2,2))

p2s

Re-building UpSetR chart with ggplot/cowplot bricks

Lutai Aleksei

28 January 2019

Intro

Data

Data

Chart1. Bar with a total number of objects/observations

Chart2. Totals for intersections

Chart 3. Totals for metrics

Chart 4. The Intersections itself

Additional boxplots

Chart 5. Metric 1, right section

Chart 6. Metric 1, left section

Chart 7. Metric 2, right section

Chart 8. Metric 2, left section

UpSet