1 LOAD Package

library(stringr)
library(tidyverse)
library(Hmisc) #http://svitsrv25.epfl.ch/R-doc/library/Hmisc/html/cut2.html
#'library(base) with function "cut"
#'https://stat.ethz.ch/R-manual/R-devel/library/base/html/cut.html

2 Create FUNCTION

#This function to convert [min;max] to format c(min,max)
b.convert.range.to.vector <- function(range){
    range=str_replace(range,"\\[","")
    range=str_replace(range,"\\]","")
    range <-unlist(strsplit(range, ";"))
    return(as.numeric(range))
}  
#This function will change one line species to n line. With n=max-min+1    
b.convert.sp.to.specimens <- function(data,charac)  {
    list_sp=c()
    list_value=c()
    for (i in 1:length(data[,charac])){
        min_max <- as.character(data[,charac])[i]
        min_max <- b.convert.range.to.vector(min_max)
        #call d is distane
        min_value=min(min_max)
        max_value=max(min_max)
        d <- max_value - min_value
        
        for (j in 0:d){
            list_sp<-append(list_sp,as.character(data$sp[i]))
            list_value<-append(list_value,min_value+j)
        }
        
    }
    df <- data.frame(list_sp,list_value)
    colnames(df)[colnames(df) == 'list_sp'] <- "sp"
    colnames(df)[colnames(df) == 'list_value'] <- charac
    return(df)
} 
#This function to convert one line species to 2 line min and max
b.convert.sp.to.min.max <- function(data,charac)  {
    list_sp=c()
    list_value=c()
    for (i in 1:length(data[,charac])){
        min_max <- as.character(data[,charac])[i]
        min_max <- b.convert.range.to.vector(min_max)
        #call d is distane
        min_value=min(min_max)
        max_value=max(min_max)
        
        list_sp<-append(list_sp,paste(as.character(data$sp[i]),"min",sep = "_"))
        list_value<-append(list_value,min_value)
        list_sp<-append(list_sp,paste(as.character(data$sp[i]),"max",sep = "_"))
        list_value<-append(list_value,max_value)
        
    }
    df <- data.frame(list_sp,list_value)
    colnames(df)[colnames(df) == 'list_sp'] <- "sp"
    colnames(df)[colnames(df) == 'list_value'] <- charac
    return(df)
}

#This function will convert all of columns in data.frame
b.convert.sp.to.min.max.all <-function(data){
    list_charactaire <-colnames(data)
    b=b.convert.sp.to.min.max(data,list_charactaire[2])
    for (charac in list_charactaire[-c(1,2)]){
        
        a <- b.convert.sp.to.min.max(data,charac)
        b <- inner_join(b, a)
    }
    return(b)
}

#This function will convert result from "b.convert.sp.to.specimens" toXper format
b.fusse.individu <-function(result,col){
    list_item<-c()
    list_etats<-c()
    
    result <- unique(result, by = col)
    list_sp <- as.character(unique(result$sp))
    for (spi in list_sp) {
        result2 <- filter(result, sp==spi)
        list_item <- append(list_item, spi)
        list_etats <-append(list_etats,paste(result2[,col], collapse = "&"))
    }
    
    df <- data.frame(list_item, list_etats)
    colnames(df)[colnames(df) == 'list_item'] <- "sp"
    colnames(df)[colnames(df) == 'list_etats'] <- col
    return(df)
}

#This function will cut all of column in your data.frame by group and return to Xper format
#============================
b.fusse.individu.all <-function(data, group=3){
    list_charactaire <-colnames(data)
    qual_data <- distinct(data,sp)
    for (col in list_charactaire[-1]){
        #col="C9"
        #group=3
        #'convert min-max to specimens
        result <- b.convert.sp.to.specimens(data,col)
        #'cut to interval
        z <- cut2(result[,col], g=group)
        #' copy group to data
        for (i in 1:length(z)){
            result[,col][i] <- as.character(z[i])
        }
        #'Fusse species
        a <- b.fusse.individu(result,col)
        
        qual_data <- inner_join(qual_data, a)
    }
    return(qual_data)
}
#==============================
#'Function to plot min-max with data.frame have 3 columns "sp","min,"max"
b.plot_min_max <- function(data){
    data$min <- as.numeric(data$min)
    data$max <- as.numeric(data$max)
    ggplot(data, aes(sp))+
        geom_linerange(aes(ymin=min,ymax=max),linetype=1,color="blue")+
        geom_point(aes(y=min),size=3,color="red")+
        geom_point(aes(y=max),size=3,color="red")+
        theme_bw()+coord_flip()+
        ylab("min-max")+xlab("species")
}

3 RUN

3.1 Data

#'LOAD data from csv file extract form XPER
cecropia_min_max <- read.csv("min_max.csv", header = T)
datatable(cecropia_min_max, class = "compact")
#remove  "[" and "]" in interval
cecropia_min_max <- cecropia_min_max %>%
    lapply(function(x) {gsub("\\[", "", x)})%>%
    lapply(function(x) {gsub("\\]", "", x)})%>%
    as.data.frame()
    
#Sort level of column sp to plot apter
cecropia_min_max$sp <-factor(cecropia_min_max$sp,levels(cecropia_min_max$sp)[c(61:1)])

datatable(cecropia_min_max, class = "compact")
#'SUBSET to 4 dataframe for 4 characters interval
segement <- cecropia_min_max %>% select(sp, segment) %>%
    separate(segment, into = c("min", "max"), sep = ";")

datatable(segement, class = "compact")
#'the same for the remaining characters
vein <- cecropia_min_max %>% select(sp, vein) %>%
    separate(vein, into = c("min", "max"), sep = ";")

stamin_spike <- cecropia_min_max %>% select(sp, stamin_spike) %>%
    separate(stamin_spike, into = c("min", "max"), sep = ";")

pistil_spike <- cecropia_min_max %>% select(sp, pistil_spike) %>%
    separate(pistil_spike, into = c("min", "max"), sep = ";")

3.2 Analysis

#'cut all column in data by group = 3
toxper <- b.fusse.individu.all(cecropia_min_max, group = 3)
toxper[] <- lapply(toxper, function(x) {gsub(",", "-", x) })
#we have result
datatable(toxper, class = "compact")
#'Plot min - max
b.plot_min_max(segement) + geom_hline(yintercept = c(10,12))

b.plot_min_max(vein) + geom_hline(yintercept = c(16,27))

b.plot_min_max(stamin_spike) + geom_hline(yintercept = c(16,34))

b.plot_min_max(pistil_spike) + geom_hline(yintercept = c(5,8))