FUNCTION

Mergemod Algorithm

Create function b.get.state to get a vector of list states of descriptor

#get list state in column
b.get.state <- function(list,sep)
{
    a <- paste(list,collapse = sep)
    b <- strsplit(a,sep)
    c <- unique(b[[1]])
    c <- c[!c%in%c("NA","UK")]
    return(c)
}

Create function b.mergemode.one.trait to suggest these pair of state can be merge. Note: In this function i add option full_state=FALSE. If it is TRUE then in the result will add these non-merged states by merge with itselt (Ex: “A-A”) to avoid forgetting when we create a new list of states.

library(gtools)
b.mergemode.one.trait <- function(trait,sep="&",full_state=F){
    #get list of state
    list_state <- mixedsort(b.get.state(trait,sep))
    #get number of taxon (n) and state (m)
    n <- length(trait) #number of taxon
    m <- length(list_state) #number of state
    # create vector resultat
    list_pair_merge <- c()
    #Run loop 
    for(i in 1:(m-1)){
        for(j in (i+1):m){
            #test if i and j can be merged
            pair_state <- c(list_state[i], list_state[j])
            res=T
            # for each couple of taxa a and b
            for(a in 1:(n-1)){
                for (b in (a+1):n){
                    #creat a vector state of each taxa a and b
                    sp_a <- unlist(strsplit(trait[a],sep))
                    sp_b <- unlist(strsplit(trait[b],sep))
                    # check length of vector intersect
                    len <- length(intersect(sp_a,sp_b))
                    
                    # If len = 0 => the two taxa are distinguished
                    if (is.na(sp_a)==F && is.na(sp_b)==F && len == 0){
                        if(length(intersect(sp_a, pair_state))>0 & length(intersect(sp_b, pair_state))>0){
                            res=F
                            break
                        }
                    }
                }
               if(res==F){break}
            }
            if(res==T){
                list_pair_merge <- append(list_pair_merge,paste(pair_state,collapse="-"))
            }
            
        }
    }
    if(full_state==T){
        list_state_merge <- b.get.state(list_pair_merge,"-")
        list_diff <- setdiff(list_state,list_state_merge)
        for(state in list_diff){
            list_pair_merge <- append(list_pair_merge, paste0(state,"-",state))
        }
    }
    return(list_pair_merge)
}

FUNCTION PLOT CLIQUE

#'This is function to plot clique
b.visNetwork <- function(g2,largest,number){
    g2$nodes$group<-"Other"
    g2$nodes$group[largest[[number]]] <- "Clique_largest"
    #Plot
    visNetwork(g2$nodes, g2$edges) %>%
        visIgraphLayout() %>%
        visNodes(size = 25, shape = "circle") %>%
        visOptions(#selectedBy="group",
                   highlightNearest = T,
                   nodesIdSelection = F) %>%
        visGroups(groupname = "Clique_largest", color = "orange")%>%
        visInteraction(keyboard = TRUE)%>%
        visLegend()
}

RUN

Load package

library(tidyverse)
library(igraph) #make graph, find largest clique
library(visNetwork)# make interactive graph

Make data exemple

#Data exemple
my_data <- data.frame(state = c("A","B&C","D&E","B&D&F","A&B&E","C&E&F"),stringsAsFactors = FALSE) 
rownames(my_data) <- paste0("sp",1:6)
my_data
##     state
## sp1     A
## sp2   B&C
## sp3   D&E
## sp4 B&D&F
## sp5 A&B&E
## sp6 C&E&F

Merge mode

With option full_state=False

(res_merge <- b.mergemode.one.trait(my_data[,"state"], full_state = F))
## [1] "B-C" "B-F" "C-F" "D-E" "D-F" "E-F"

Plot graph

#Convert res_merge to dataframe for igraph
D <- data.frame(value=res_merge)%>%
    separate(value, into = c("S1", "S2"), sep = "-")
#Make with igraph
g1 <- graph.data.frame(D, directed = F)
#'Find largest cliques
largest <- largest.cliques(g1)
#convert g1 to Visnetwork
g2 <- toVisNetworkData(g1)
#plot first group in largest
b.visNetwork(g2,largest,1)

With option full_state=TRUE

(res_merge <- b.mergemode.one.trait(my_data[,"state"], full_state = T))
## [1] "B-C" "B-F" "C-F" "D-E" "D-F" "E-F" "A-A"

Plot graph

#Convert res_merge to dataframe for igraph
D <- data.frame(value=res_merge)%>%
    separate(value, into = c("S1", "S2"), sep = "-")
#Make with igraph
g1 <- graph.data.frame(D, directed = F)
#'Find largest cliques
largest <- largest.cliques(g1)
#convert g1 to Visnetwork
g2 <- toVisNetworkData(g1)
#plot first group in largest
b.visNetwork(g2,largest,1)

Test case Unknown convert to all possible

my_data2 <- data.frame(state = c("A",NA,"D&E","B&D&F","A&B&C&D&E&F","C&E&F"),stringsAsFactors = FALSE) 
rownames(my_data2) <- paste0("sp",1:6)
my_data2
##           state
## sp1           A
## sp2        <NA>
## sp3         D&E
## sp4       B&D&F
## sp5 A&B&C&D&E&F
## sp6       C&E&F
(res_merge <- b.mergemode.one.trait(my_data2[,"state"], full_state = T))
##  [1] "B-C" "B-D" "B-E" "B-F" "C-D" "C-E" "C-F" "D-E" "D-F" "E-F" "A-A"

Plot graph

#Convert res_merge to dataframe for igraph
D <- data.frame(value=res_merge)%>%
    separate(value, into = c("S1", "S2"), sep = "-")
#Make with igraph
g1 <- graph.data.frame(D, directed = F)
#'Find largest cliques
largest <- largest.cliques(g1)
#convert g1 to Visnetwork
g2 <- toVisNetworkData(g1)
#plot first group in largest
b.visNetwork(g2,largest,1)
(sp_a <- unlist(strsplit(my_data2[,"state"][2],"&")))
## [1] NA
(sp_b <- unlist(strsplit(my_data2[,"state"][4],"&")))
## [1] "B" "D" "F"
intersect(sp_a,sp_b)
## character(0)

Test case Unknown convert to NA

my_data3 <- data.frame(state = c("A",NA,"D&E","B&D&F",NA,"C&E&F"),stringsAsFactors = FALSE) 
rownames(my_data3) <- paste0("sp",1:6)
my_data3
##     state
## sp1     A
## sp2  <NA>
## sp3   D&E
## sp4 B&D&F
## sp5  <NA>
## sp6 C&E&F
(res_merge <- b.mergemode.one.trait(my_data3[,"state"], full_state = T))
##  [1] "B-C" "B-D" "B-E" "B-F" "C-D" "C-E" "C-F" "D-E" "D-F" "E-F" "A-A"

Plot graph

#Convert res_merge to dataframe for igraph
D <- data.frame(value=res_merge)%>%
    separate(value, into = c("S1", "S2"), sep = "-")
#Make with igraph
g1 <- graph.data.frame(D, directed = F)
#'Find largest cliques
largest <- largest.cliques(g1)
#convert g1 to Visnetwork
g2 <- toVisNetworkData(g1)
#plot first group in largest
b.visNetwork(g2,largest,1)

Test case Unknown convert to NA and remove

my_data4 <- data.frame(state = c("A",NA,"D&E","B&D&F",NA,"C&E&F"),stringsAsFactors = FALSE) 
rownames(my_data4) <- paste0("sp",1:6)
my_data4
##     state
## sp1     A
## sp2  <NA>
## sp3   D&E
## sp4 B&D&F
## sp5  <NA>
## sp6 C&E&F
#remove NA value
my_data4 <- na.omit(my_data4)
my_data4
##     state
## sp1     A
## sp3   D&E
## sp4 B&D&F
## sp6 C&E&F
(res_merge <- b.mergemode.one.trait(my_data4[,"state"], full_state = T))
##  [1] "B-C" "B-D" "B-E" "B-F" "C-D" "C-E" "C-F" "D-E" "D-F" "E-F" "A-A"

Plot graph

#Convert res_merge to dataframe for igraph
D <- data.frame(value=res_merge)%>%
    separate(value, into = c("S1", "S2"), sep = "-")
#Make with igraph
g1 <- graph.data.frame(D, directed = F)
#'Find largest cliques
largest <- largest.cliques(g1)
#convert g1 to Visnetwork
g2 <- toVisNetworkData(g1)
#plot first group in largest
b.visNetwork(g2,largest,1)