Venn’s Diagrams

This essay addresses the use of the VennDiagram package.

Cerrado

The Alexandre Marques’ original data from cerrado, a biome at central Brazil is below:

idt <- c(1:5)
vulgar <- c("açoita-cavalo","amarelinho","amendoim-bravo","angelim-do-cerrado","angico")
cientifico <- c("Luehea spp","Plathymenia reticulata","Pterogyne nitens","Andira vermifuga","Anadenanthera spp")
bm1 <- c(TRUE,FALSE,FALSE,FALSE,TRUE)
bm2 <- c(FALSE,FALSE,FALSE,TRUE,TRUE)
dados <- data.frame(idt,vulgar,cientifico,bm1,bm2)
dados

##   idt             vulgar             cientifico   bm1   bm2
## 1   1      açoita-cavalo             Luehea spp  TRUE FALSE
## 2   2         amarelinho Plathymenia reticulata FALSE FALSE
## 3   3     amendoim-bravo       Pterogyne nitens FALSE FALSE
## 4   4 angelim-do-cerrado       Andira vermifuga FALSE  TRUE
## 5   5             angico      Anadenanthera spp  TRUE  TRUE

The Processed Data and Diagrams

First let’s do the diagram manually and then automatically. The following shows an example and finally we present the diagrams of Venn with the scientific names in the communities BM1, BM2 and Fora.

Manually now:

#  Primeiro manual em percentagem
grid.newpage()
vv <- draw.triple.venn(area1 = 40, area2 = 40, area3 = 40, n12 = 20, n23 = 0, n13 = 0,  n123 = 0, category = c("BM1", "BM2", "Fora"), lty = "blank",     fill = c("blue", "red", "green"))

Processing data and then automatically generating the Venn diagram in absolute numbers:

# processa dados
BM1 <- dados[dados$bm1==TRUE,3]
BM2 <- dados[dados$bm2==TRUE,3]
Fora <- dados[bm1==FALSE & bm2==FALSE,3]
# Gera gráfico automaticamente: números absolutos
w <- venn.diagram(list(BM1=BM1, BM2=BM2,Fora=Fora),
                  fill = c("red", "blue","green"),
                  alpha = c(0.4, 0.4, 0.4), cat.cex = 1.5, cex= 1.5,
                  filename=NULL)
# the default plot
grid.newpage()
grid.draw(w)

The stackoverflow’s example guides diagram with item labels:

# modificação
# your data
foo1 <- c('a', 'b','c')
baa1 <- c('a','b','e','f','g')
fuba1 <- c('x','y','z','w')
# Generate plot
v2 <- venn.diagram(list(foo=foo1, baa=baa1,fuba=fuba1),
                  fill = c("red", "blue","green"),
                  alpha = c(0.5, 0.5, 0.5), cat.cex = 1.5, cex=1.5,
                  filename=NULL)

# have a look at the default plot
grid.newpage()
grid.draw(v2)

# If you want, then have a look at the names in the plot object v2
# names {base}: Functions to get or set the names of an object.
# xx <- lapply(v2,  names)
# str(xx)
# xx
# We are interested in the labels
# xxx <- lapply(v2, function(i) i$label)
# str(xxx)
# xxx

# Over-write labels (5 to 7 chosen by manual check of labels)
# foo1 only
v2[[7]]$label  <- paste(setdiff(foo1, baa1), collapse="\n")  
# in baa1 only
v2[[8]]$label <- paste(setdiff(baa1, foo1)  , collapse="\n")  
# intesection foo1 and baa1
v2[[9]]$label <- paste(intersect(foo1, baa1), collapse="\n")  
# fora: out
v2[[10]]$label <- paste(fuba1, collapse="\n") 

# plot  
grid.newpage()
grid.draw(v2)

The Venn Diagram with Item Labels

See this:

w <- venn.diagram(list(BM1=BM1, BM2=BM2,Fora=Fora),
                  fill = c("red", "blue","green"),
                  alpha = c(0.4, 0.4, 0.4), cat.cex = 2.0, cex=0.8,
                  filename=NULL)
# Over-write labels (7 to 10 chosen by manual check of labels)
# BM1 only
w[[7]]$label  <- paste(setdiff(BM1, BM2), collapse="\n")  
# in BM2 only
w[[8]]$label <- paste(setdiff(BM2, BM1)  , collapse="\n")  
# intesection BM1 and BM2
w[[9]]$label <- paste(intersect(BM1, BM2), collapse="\n")  
# Fora: out
w[[10]]$label <- paste(Fora, collapse="\n") 

# plot  
grid.newpage()
grid.draw(w)

More an example with Alexandre Marques’ data:

frag1 <- c("Andira vermifuga","Anadenanthera spp","Dipteryx alata","Copaifera langsdorffii","Dimorphandra mollis",
"Pterodon spp","Astronium fraxinifolium","Machaerium acutifolium","Hymenaea stigonocarpa","Simarouba versicolor",
"Eriotheca pubescens","Kielmeyera coriacea","Caryocar brasiliense","Magonia pubescens")

frag2 <- c("Pterogyne nitens","Anadenanthera spp","Peltophorum dubium","Mabea fistulifera","Roupala montana",
"Cedrella fissilis","Copaifera langsdorffii","Cecropia spp","Dimorphandra mollis","Apuleia leiocarpa","Pouteria torta",
"Inga spp","Handroanthus ochraceus","Handroanthus heptaphyllus","Machaerium opacum","Machaerium acutifolium",
"Hymenaea courbaril","Hymenaea stigonocarpa","Genipa americana","Cordia glabrata","Terminalia glabrescens",
"Eriotheca pubescens","Gallesia integrifolia","Qualea parviflora","Qualea grandiflora","Tapirira guianensis",
"Caryocar brasiliense","Aspidosperma polyneuron","Xylopia aromatica","Croton urucurana","Triplaris americana",
"Attalea apoda","Cordia trichotoma")


overlap <- calculate.overlap(x = list("Frag1" = frag1,"Frag2" = frag2));
overlap

## $a1
##  [1] "Andira vermifuga"        "Anadenanthera spp"      
##  [3] "Dipteryx alata"          "Copaifera langsdorffii" 
##  [5] "Dimorphandra mollis"     "Pterodon spp"           
##  [7] "Astronium fraxinifolium" "Machaerium acutifolium" 
##  [9] "Hymenaea stigonocarpa"   "Simarouba versicolor"   
## [11] "Eriotheca pubescens"     "Kielmeyera coriacea"    
## [13] "Caryocar brasiliense"    "Magonia pubescens"      
## 
## $a2
##  [1] "Pterogyne nitens"          "Anadenanthera spp"        
##  [3] "Peltophorum dubium"        "Mabea fistulifera"        
##  [5] "Roupala montana"           "Cedrella fissilis"        
##  [7] "Copaifera langsdorffii"    "Cecropia spp"             
##  [9] "Dimorphandra mollis"       "Apuleia leiocarpa"        
## [11] "Pouteria torta"            "Inga spp"                 
## [13] "Handroanthus ochraceus"    "Handroanthus heptaphyllus"
## [15] "Machaerium opacum"         "Machaerium acutifolium"   
## [17] "Hymenaea courbaril"        "Hymenaea stigonocarpa"    
## [19] "Genipa americana"          "Cordia glabrata"          
## [21] "Terminalia glabrescens"    "Eriotheca pubescens"      
## [23] "Gallesia integrifolia"     "Qualea parviflora"        
## [25] "Qualea grandiflora"        "Tapirira guianensis"      
## [27] "Caryocar brasiliense"      "Aspidosperma polyneuron"  
## [29] "Xylopia aromatica"         "Croton urucurana"         
## [31] "Triplaris americana"       "Attalea apoda"            
## [33] "Cordia trichotoma"        
## 
## $a3
## [1] "Anadenanthera spp"      "Copaifera langsdorffii"
## [3] "Dimorphandra mollis"    "Machaerium acutifolium"
## [5] "Hymenaea stigonocarpa"  "Eriotheca pubescens"   
## [7] "Caryocar brasiliense"

# Generate plot
v2 <- venn.diagram(list(sl1=frag1, sl2=frag2),
                  fill = c("red", "blue"),
                  alpha = c(0.5, 0.5), cat.cex = 1.5, cex=1.5,
                  filename=NULL)

# have a look at the default plot
grid.newpage()
grid.draw(v2)

#Diagrama de Venn Final
#Finalmente o gráfico desejado:

w <- venn.diagram(list(SL1=frag1, SL2=frag2),
                  fill = c("blue","green"),
                  alpha = c(0.4, 0.4), cat.cex = 2.0, cex=0.8,
                  filename=NULL)

# Over-write labels (6 to 7 chosen by manual check of labels)
# frag1 only
w[[6]]$label  <- paste(setdiff(frag1, frag2), collapse="\n")  
# in frag2 only
w[[5]]$label <- paste(setdiff(frag2, frag1)  , collapse="\n")  
# intesection frag1 and frag2
w[[7]]$label <- paste(intersect(frag1, frag2), collapse="\n")  

# plot  
grid.newpage()
grid.draw(w)

The logic of renaming the labels is the inclusion-exclusion principle of set theory, let’s look at an example with three communities and the cardinalities of each subset:

\[\#\left(A\cup B\cup C\right)=\#A+\#B+\#C-\#\left(A\cap B\right)-\#\left(A\cap C\right)-\#\left(B\cap C\right)+\#\left(A\cup B\cup C\right)\] Where \(\#\) is the number of elements of each set or cardinality.

More generally:

\[|\overset{n}{\underset{i=1}{\bigcup}}A_{i}|=\underset{i}{\sum}|A_{i}|-\underset{i<j}{\sum}|A_{i}\bigcap A_{j}|+\underset{i<j<k}{\sum}|A_{i}\bigcap A_{j}\bigcap A_{k}|-...+\left(-1\right)^{n+1}|A_{i}\bigcap...\bigcap A_{n}|\]

aa <- c("1","2","3","4","5","6","a","b","h","i","j","k","e","f","g")
bb <- c("1","2","3","4","5","6","a","c","d","e","f","g","r","s","t","u","v","x")
cc <- c("1","2","3","4","5","6","a","b","c","d","m","n","o","p","q")
x <- list(A=aa, B=bb, C=cc)
ww <- get.venn.partitions(x) # um data.frame
ww

##       A     B     C   ..set..          ..values.. ..count..
## 1  TRUE  TRUE  TRUE     A∩B∩C 1, 2, 3, 4, 5, 6, a         7
## 2 FALSE  TRUE  TRUE (B∩C)∖(A)                c, d         2
## 3  TRUE FALSE  TRUE (A∩C)∖(B)                   b         1
## 4 FALSE FALSE  TRUE (C)∖(A∪B)       m, n, o, p, q         5
## 5  TRUE  TRUE FALSE (A∩B)∖(C)             e, f, g         3
## 6 FALSE  TRUE FALSE (B)∖(A∪C)    r, s, t, u, v, x         6
## 7  TRUE FALSE FALSE (A)∖(B∪C)          h, i, j, k         4

str(ww)

## 'data.frame':    7 obs. of  6 variables:
##  $ A         : logi  TRUE FALSE TRUE FALSE TRUE FALSE ...
##  $ B         : logi  TRUE TRUE FALSE FALSE TRUE TRUE ...
##  $ C         : logi  TRUE TRUE TRUE TRUE FALSE FALSE ...
##  $ ..set..   : chr  "A∩B∩C" "(B∩C)∖(A)" "(A∩C)∖(B)" "(C)∖(A∪B)" ...
##  $ ..values..:List of 7
##   ..$ 1: chr  "1" "2" "3" "4" ...
##   ..$ 2: chr  "c" "d"
##   ..$ 3: chr "b"
##   ..$ 4: chr  "m" "n" "o" "p" ...
##   ..$ 5: chr  "e" "f" "g"
##   ..$ 6: chr  "r" "s" "t" "u" ...
##   ..$ 7: chr  "h" "i" "j" "k"
##  $ ..count.. : int  7 2 1 5 3 6 4

# intersecção AnBnC
ww[1,4]

## [1] "A∩B∩C"

# elementos da intersecção
ww[1,5]

## $`1`
## [1] "1" "2" "3" "4" "5" "6" "a"

ww[[1,5]]

## [1] "1" "2" "3" "4" "5" "6" "a"

# experimento com paste
paste(ww[[1,5]],collapse = "")

## [1] "123456a"

# overlap <- calculate.overlap(x);
# overlap
# Default plot
v3 <- venn.diagram(x, fill = c("red", "blue", "green"),
                  alpha = c(0.5, 0.5, 0.5), cat.cex = 1.5, cex=1.0,
                  filename=NULL)
grid.newpage()
grid.draw(v3)

# Over-write labels (7 to 13 chosen by manual check of labels)
# A-(B+C) ou (A)\(BnC): 
v3[[7]]$label  <- paste(setdiff(aa,union(bb,cc)),collapse = "")
# Intersecção A, B e C ou AnBnC
inters <- intersect(aa,intersect(bb,cc))
# A inter B - inters
v3[[8]]$label <- paste(setdiff(intersect(aa,bb), inters),collapse = "")
# B-(A+C)
v3[[9]]$label <- paste(setdiff(bb,union(aa,cc)),collapse = "")
# A inter C - inters
v3[[10]]$label <- paste(setdiff(intersect(aa,cc), inters),collapse = "")
# Intersecção A, B e C: posso usar inters ou ww[[1,5]]
v3[[11]]$label <- paste(ww[[1,5]], collapse = "")
# B inter C - inters
v3[[12]]$label <- paste(setdiff(intersect(cc,bb), inters),collapse = "")
# C-(A+B)
v3[[13]]$label <- paste(setdiff(cc,union(aa,bb)),collapse = "")
# plot
grid.newpage()
grid.draw(v3)

Thus the most laborious is to find out which labels correspond to which subsets; then we must change the cardinality of each subset by ‘labels’ with the corresponding elements. I would like to use a more automatic method, I have not done yet. Has anyone already?
………

Venn’s Diagrams

Sérgio Quadros

1 de março de 2017

Cerrado

The Processed Data and Diagrams

The Venn Diagram with Item Labels

More