FUNCTION
Mergemod Algorithm
Create function b.get.state to get a vector of list states of descriptor
#get list state in column
b.get.state <- function(list,sep)
{
a <- paste(list,collapse = sep)
b <- strsplit(a,sep)
c <- unique(b[[1]])
c <- c[!c%in%c("NA","UK")]
return(c)
}Create function b.mergemode.one.trait to suggest these pair of state can be merge. Note: In this function i add option full_state=FALSE. If it is TRUE then in the result will add these non-merged states by merge with itselt (Ex: “A-A”) to avoid forgetting when we create a new list of states.
library(gtools)
b.mergemode.one.trait <- function(trait,sep="&",full_state=F){
#get list of state
list_state <- mixedsort(b.get.state(trait,sep))
#get number of taxon (n) and state (m)
n <- length(trait) #number of taxon
m <- length(list_state) #number of state
# create vector resultat
list_pair_merge <- c()
#Run loop
for(i in 1:(m-1)){
for(j in (i+1):m){
#test if i and j can be merged
pair_state <- c(list_state[i], list_state[j])
res=T
# for each couple of taxa a and b
for(a in 1:(n-1)){
for (b in (a+1):n){
#creat a vector state of each taxa a and b
sp_a <- unlist(strsplit(trait[a],sep))
sp_b <- unlist(strsplit(trait[b],sep))
# check length of vector intersect
len <- length(intersect(sp_a,sp_b))
# If len = 0 => the two taxa are distinguished
if (is.na(sp_a)==F && is.na(sp_b)==F && len == 0){
if(length(intersect(sp_a, pair_state))>0 & length(intersect(sp_b, pair_state))>0){
res=F
break
}
}
}
if(res==F){break}
}
if(res==T){
list_pair_merge <- append(list_pair_merge,paste(pair_state,collapse="-"))
}
}
}
if(full_state==T){
list_state_merge <- b.get.state(list_pair_merge,"-")
list_diff <- setdiff(list_state,list_state_merge)
for(state in list_diff){
list_pair_merge <- append(list_pair_merge, paste0(state,"-",state))
}
}
return(list_pair_merge)
}FUNCTION PLOT CLIQUE
#'This is function to plot clique
b.visNetwork <- function(g2,largest,number){
g2$nodes$group<-"Other"
g2$nodes$group[largest[[number]]] <- "Clique_largest"
#Plot
visNetwork(g2$nodes, g2$edges) %>%
visIgraphLayout() %>%
visNodes(size = 25, shape = "circle") %>%
visOptions(#selectedBy="group",
highlightNearest = T,
nodesIdSelection = F) %>%
visGroups(groupname = "Clique_largest", color = "orange")%>%
visInteraction(keyboard = TRUE)%>%
visLegend()
}RUN
Load package
library(tidyverse)
library(igraph) #make graph, find largest clique
library(visNetwork)# make interactive graphMake data exemple
#Data exemple
my_data <- data.frame(state = c("A","B&C","D&E","B&D&F","A&B&E","C&E&F"),stringsAsFactors = FALSE)
rownames(my_data) <- paste0("sp",1:6)
my_data## state
## sp1 A
## sp2 B&C
## sp3 D&E
## sp4 B&D&F
## sp5 A&B&E
## sp6 C&E&F
Merge mode
With option full_state=False
(res_merge <- b.mergemode.one.trait(my_data[,"state"], full_state = F))## [1] "B-C" "B-F" "C-F" "D-E" "D-F" "E-F"
Plot graph
#Convert res_merge to dataframe for igraph
D <- data.frame(value=res_merge)%>%
separate(value, into = c("S1", "S2"), sep = "-")
#Make with igraph
g1 <- graph.data.frame(D, directed = F)
#'Find largest cliques
largest <- largest.cliques(g1)
#convert g1 to Visnetwork
g2 <- toVisNetworkData(g1)
#plot first group in largest
b.visNetwork(g2,largest,1)With option full_state=TRUE
(res_merge <- b.mergemode.one.trait(my_data[,"state"], full_state = T))## [1] "B-C" "B-F" "C-F" "D-E" "D-F" "E-F" "A-A"
Plot graph
#Convert res_merge to dataframe for igraph
D <- data.frame(value=res_merge)%>%
separate(value, into = c("S1", "S2"), sep = "-")
#Make with igraph
g1 <- graph.data.frame(D, directed = F)
#'Find largest cliques
largest <- largest.cliques(g1)
#convert g1 to Visnetwork
g2 <- toVisNetworkData(g1)
#plot first group in largest
b.visNetwork(g2,largest,1)Test case Unknown convert to all possible
my_data2 <- data.frame(state = c("A",NA,"D&E","B&D&F","A&B&C&D&E&F","C&E&F"),stringsAsFactors = FALSE)
rownames(my_data2) <- paste0("sp",1:6)
my_data2## state
## sp1 A
## sp2 <NA>
## sp3 D&E
## sp4 B&D&F
## sp5 A&B&C&D&E&F
## sp6 C&E&F
(res_merge <- b.mergemode.one.trait(my_data2[,"state"], full_state = T))## [1] "B-C" "B-D" "B-E" "B-F" "C-D" "C-E" "C-F" "D-E" "D-F" "E-F" "A-A"
Plot graph
#Convert res_merge to dataframe for igraph
D <- data.frame(value=res_merge)%>%
separate(value, into = c("S1", "S2"), sep = "-")
#Make with igraph
g1 <- graph.data.frame(D, directed = F)
#'Find largest cliques
largest <- largest.cliques(g1)
#convert g1 to Visnetwork
g2 <- toVisNetworkData(g1)
#plot first group in largest
b.visNetwork(g2,largest,1)(sp_a <- unlist(strsplit(my_data2[,"state"][2],"&")))## [1] NA
(sp_b <- unlist(strsplit(my_data2[,"state"][4],"&")))## [1] "B" "D" "F"
intersect(sp_a,sp_b)## character(0)
Test case Unknown convert to NA
my_data3 <- data.frame(state = c("A",NA,"D&E","B&D&F",NA,"C&E&F"),stringsAsFactors = FALSE)
rownames(my_data3) <- paste0("sp",1:6)
my_data3## state
## sp1 A
## sp2 <NA>
## sp3 D&E
## sp4 B&D&F
## sp5 <NA>
## sp6 C&E&F
(res_merge <- b.mergemode.one.trait(my_data3[,"state"], full_state = T))## [1] "B-C" "B-D" "B-E" "B-F" "C-D" "C-E" "C-F" "D-E" "D-F" "E-F" "A-A"
Plot graph
#Convert res_merge to dataframe for igraph
D <- data.frame(value=res_merge)%>%
separate(value, into = c("S1", "S2"), sep = "-")
#Make with igraph
g1 <- graph.data.frame(D, directed = F)
#'Find largest cliques
largest <- largest.cliques(g1)
#convert g1 to Visnetwork
g2 <- toVisNetworkData(g1)
#plot first group in largest
b.visNetwork(g2,largest,1)Test case Unknown convert to NA and remove
my_data4 <- data.frame(state = c("A",NA,"D&E","B&D&F",NA,"C&E&F"),stringsAsFactors = FALSE)
rownames(my_data4) <- paste0("sp",1:6)
my_data4## state
## sp1 A
## sp2 <NA>
## sp3 D&E
## sp4 B&D&F
## sp5 <NA>
## sp6 C&E&F
#remove NA value
my_data4 <- na.omit(my_data4)
my_data4## state
## sp1 A
## sp3 D&E
## sp4 B&D&F
## sp6 C&E&F
(res_merge <- b.mergemode.one.trait(my_data4[,"state"], full_state = T))## [1] "B-C" "B-D" "B-E" "B-F" "C-D" "C-E" "C-F" "D-E" "D-F" "E-F" "A-A"
Plot graph
#Convert res_merge to dataframe for igraph
D <- data.frame(value=res_merge)%>%
separate(value, into = c("S1", "S2"), sep = "-")
#Make with igraph
g1 <- graph.data.frame(D, directed = F)
#'Find largest cliques
largest <- largest.cliques(g1)
#convert g1 to Visnetwork
g2 <- toVisNetworkData(g1)
#plot first group in largest
b.visNetwork(g2,largest,1)