1 IDEA

1: Convert 1 line (species) to n lines (individus), with n = max-min+1

2: Use function cut2 in package Hmisc with R to cut by k group.

3: Check value that is cut by cut2

2 LOAD Package

library(stringr)
library(tidyverse)
library(Hmisc) #http://svitsrv25.epfl.ch/R-doc/library/Hmisc/html/cut2.html
#'library(base) with function "cut"
#'https://stat.ethz.ch/R-manual/R-devel/library/base/html/cut.html

3 Create FUNCTION

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#This function to convert [min;max] to format c(min,max)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
b.convert.range.to.vector <- function(range){
    range=str_replace(range,"\\[","")
    range=str_replace(range,"\\]","")
    range <-unlist(strsplit(range, ";"))
    return(as.numeric(range))
}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#This function will change one line species to n line. With n=max-min+1 
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
b.convert.sp.to.specimens <- function(data,charac)  {
    list_sp=c()
    list_value=c()
    for (i in 1:length(data[,charac])){
        min_max <- as.character(data[,charac])[i]
        min_max <- b.convert.range.to.vector(min_max)
        #call d is distane
        min_value=min(min_max)
        max_value=max(min_max)
        d <- max_value - min_value
        
        for (j in 0:d){
            list_sp<-append(list_sp,as.character(data$sp[i]))
            list_value<-append(list_value,min_value+j)
        }
        
    }
    df <- data.frame(list_sp,list_value)
    colnames(df)[colnames(df) == 'list_sp'] <- "sp"
    colnames(df)[colnames(df) == 'list_value'] <- charac
    return(df)
} 

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#This function will convert result from "b.convert.sp.to.specimens" toXper format%
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
b.xper.format <-function(result,col){
    list_item<-c()
    list_etats<-c()
    
    result <- unique(result, by = col)
    list_sp <- as.character(unique(result$sp))
    for (spi in list_sp) {
        result2 <- filter(result, sp==spi)
        list_item <- append(list_item, spi)
        list_etats <-append(list_etats,paste(result2[,col], collapse = "&"))
    }
    
    df <- data.frame(list_item, list_etats)
    colnames(df)[colnames(df) == 'list_item'] <- "sp"
    colnames(df)[colnames(df) == 'list_etats'] <- col
    return(df)
}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#This function will cut all of column in your data.frame by group and return to Xper format %
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
b.interval.to.class.all <-function(data, group=3){
    list_charactaire <-colnames(data)
    qual_data <- distinct(data,sp)
    for (col in list_charactaire[-1]){
        #col="C9"
        #group=3
        #'convert min-max to specimens
        result <- b.convert.sp.to.specimens(data,col)
        #'cut to interval
        z <- cut2(result[,col], g=group)
        #' copy group to data
        for (i in 1:length(z)){
            result[,col][i] <- as.character(z[i])
        }
        #'b.xper.format
        a <- b.xper.format(result,col)
        
        qual_data <- inner_join(qual_data, a)
    }
    return(qual_data)
}

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#'Function to plot min-max with data.frame have 3 columns "sp","min,"max"  %
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
b.plot_min_max <- function(data){
    data$min <- as.numeric(data$min)
    data$max <- as.numeric(data$max)
    ggplot(data, aes(sp))+
        geom_linerange(aes(ymin=min,ymax=max),linetype=1,color="blue")+
        geom_point(aes(y=min),size=3,color="red")+
        geom_point(aes(y=max),size=3,color="red")+
        theme_bw()+coord_flip()+
        ylab("min-max")+xlab("species")
}

4 RUN

4.1 Data

#'LOAD data from csv file extract form XPER
cecropia_min_max <- read.csv("min_max.csv", header = T)
datatable(cecropia_min_max, class = "compact")

Remove “[" and “]” in interval

cecropia_min_max <- cecropia_min_max %>%
    lapply(function(x) {gsub("\\[", "", x)})%>%
    lapply(function(x) {gsub("\\]", "", x)})%>%
    as.data.frame()

Sort level of column sp to plot after

cecropia_min_max$sp <-factor(cecropia_min_max$sp,levels(cecropia_min_max$sp)[c(61:1)])

#View
datatable(cecropia_min_max, class = "compact")

4.2 Convert interval to class in data.frame

#'cut all column in data by group = 3
to.class <- b.interval.to.class.all(cecropia_min_max, group = 3)
to.class[] <- lapply(to.class, function(x) {gsub(",", "-", x) })

#we have result
datatable(to.class, class = "compact")

We have:

  • segment is cut at 10 and 12
  • vein is cut at 16 and 27
  • stamin_spike is cut at 16, 34
  • pistil_spike is cut at 5, 8

5 Plot min max

5.1 Subset data

#'SUBSET to 4 dataframe for 4 characters interval to plot min-max after
segment <- cecropia_min_max %>% select(sp, segment) %>%
    separate(segment, into = c("min", "max"), sep = ";")
#View
datatable(segment, class = "compact")
#'the same for the remaining characters
vein <- cecropia_min_max %>% select(sp, vein) %>%
    separate(vein, into = c("min", "max"), sep = ";")

stamin_spike <- cecropia_min_max %>% select(sp, stamin_spike) %>%
    separate(stamin_spike, into = c("min", "max"), sep = ";")

pistil_spike <- cecropia_min_max %>% select(sp, pistil_spike) %>%
    separate(pistil_spike, into = c("min", "max"), sep = ";")

5.2 Plot

Use function b.plot_min_max

#SEGMENT
b.plot_min_max(segment) + geom_hline(yintercept = c(10,12))

#VEIN
b.plot_min_max(vein) + geom_hline(yintercept = c(16,27))

#STAMIN_SPIKE
b.plot_min_max(stamin_spike) + geom_hline(yintercept = c(16,34))

#PISTIL_SPIKE
b.plot_min_max(pistil_spike) + geom_hline(yintercept = c(5,8))

6 Convert for one trait

segment_indi<- b.convert.sp.to.specimens(cecropia_min_max, "segment")
table(cut2(segment_indi$segment,g=4))
## 
## [ 5, 9) [ 9,11) [11,13) [13,24] 
##      85      81      50      58
b.plot_min_max(segment) + geom_hline(yintercept = c(9,11,13))

nf <- layout(mat = matrix(c(1,2),2,1, byrow=TRUE),  height = c(2,4))
par(mar=c(4.5, 4.5, 1.1, 2.1))
boxplot(segment_indi$segment, 
        horizontal=TRUE,  outline=TRUE, frame=F, col = "#FFA500",cex.axis=1.5)

d <- density(segment_indi$segment)
plot(d, main="")
polygon(d, col="#FFA500", border = "#FFA500")

nf <- layout(mat = matrix(c(1,2),2,1, byrow=TRUE),  height = c(2,4))
par(mar=c(4.5, 4.5, 1.1, 2.1))
boxplot(segment_indi$segment, 
        horizontal=TRUE,  outline=TRUE, frame=F, col = "#FFA500",cex.axis=1.5)
hist(segment_indi$segment, freq = F, col="#FFA500")
lines(density(segment_indi$segment), col="red", lwd=3)

ggplot(segment_indi, aes(x=segment))+
  geom_density(color="darkblue", fill="lightblue")