1: Convert 1 line (species) to n lines (individus), with n = max-min+1
2: Use function cut2 in package Hmisc with R to cut by k group.
3: Check value that is cut by cut2
library(stringr)
library(tidyverse)
library(Hmisc) #http://svitsrv25.epfl.ch/R-doc/library/Hmisc/html/cut2.html
#'library(base) with function "cut"
#'https://stat.ethz.ch/R-manual/R-devel/library/base/html/cut.html#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#This function to convert [min;max] to format c(min,max)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
b.convert.range.to.vector <- function(range){
range=str_replace(range,"\\[","")
range=str_replace(range,"\\]","")
range <-unlist(strsplit(range, ";"))
return(as.numeric(range))
}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#This function will change one line species to n line. With n=max-min+1
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
b.convert.sp.to.specimens <- function(data,charac) {
list_sp=c()
list_value=c()
for (i in 1:length(data[,charac])){
min_max <- as.character(data[,charac])[i]
min_max <- b.convert.range.to.vector(min_max)
#call d is distane
min_value=min(min_max)
max_value=max(min_max)
d <- max_value - min_value
for (j in 0:d){
list_sp<-append(list_sp,as.character(data$sp[i]))
list_value<-append(list_value,min_value+j)
}
}
df <- data.frame(list_sp,list_value)
colnames(df)[colnames(df) == 'list_sp'] <- "sp"
colnames(df)[colnames(df) == 'list_value'] <- charac
return(df)
}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#This function will convert result from "b.convert.sp.to.specimens" toXper format%
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
b.xper.format <-function(result,col){
list_item<-c()
list_etats<-c()
result <- unique(result, by = col)
list_sp <- as.character(unique(result$sp))
for (spi in list_sp) {
result2 <- filter(result, sp==spi)
list_item <- append(list_item, spi)
list_etats <-append(list_etats,paste(result2[,col], collapse = "&"))
}
df <- data.frame(list_item, list_etats)
colnames(df)[colnames(df) == 'list_item'] <- "sp"
colnames(df)[colnames(df) == 'list_etats'] <- col
return(df)
}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#This function will cut all of column in your data.frame by group and return to Xper format %
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
b.interval.to.class.all <-function(data, group=3){
list_charactaire <-colnames(data)
qual_data <- distinct(data,sp)
for (col in list_charactaire[-1]){
#col="C9"
#group=3
#'convert min-max to specimens
result <- b.convert.sp.to.specimens(data,col)
#'cut to interval
z <- cut2(result[,col], g=group)
#' copy group to data
for (i in 1:length(z)){
result[,col][i] <- as.character(z[i])
}
#'b.xper.format
a <- b.xper.format(result,col)
qual_data <- inner_join(qual_data, a)
}
return(qual_data)
}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#'Function to plot min-max with data.frame have 3 columns "sp","min,"max" %
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
b.plot_min_max <- function(data){
data$min <- as.numeric(data$min)
data$max <- as.numeric(data$max)
ggplot(data, aes(sp))+
geom_linerange(aes(ymin=min,ymax=max),linetype=1,color="blue")+
geom_point(aes(y=min),size=3,color="red")+
geom_point(aes(y=max),size=3,color="red")+
theme_bw()+coord_flip()+
ylab("min-max")+xlab("species")
}#'LOAD data from csv file extract form XPER
cecropia_min_max <- read.csv("min_max.csv", header = T)
datatable(cecropia_min_max, class = "compact")Remove “[" and “]” in interval
cecropia_min_max <- cecropia_min_max %>%
lapply(function(x) {gsub("\\[", "", x)})%>%
lapply(function(x) {gsub("\\]", "", x)})%>%
as.data.frame()Sort level of column sp to plot after
cecropia_min_max$sp <-factor(cecropia_min_max$sp,levels(cecropia_min_max$sp)[c(61:1)])
#View
datatable(cecropia_min_max, class = "compact")#'cut all column in data by group = 3
to.class <- b.interval.to.class.all(cecropia_min_max, group = 3)
to.class[] <- lapply(to.class, function(x) {gsub(",", "-", x) })
#we have result
datatable(to.class, class = "compact")We have:
#'SUBSET to 4 dataframe for 4 characters interval to plot min-max after
segment <- cecropia_min_max %>% select(sp, segment) %>%
separate(segment, into = c("min", "max"), sep = ";")
#View
datatable(segment, class = "compact")#'the same for the remaining characters
vein <- cecropia_min_max %>% select(sp, vein) %>%
separate(vein, into = c("min", "max"), sep = ";")
stamin_spike <- cecropia_min_max %>% select(sp, stamin_spike) %>%
separate(stamin_spike, into = c("min", "max"), sep = ";")
pistil_spike <- cecropia_min_max %>% select(sp, pistil_spike) %>%
separate(pistil_spike, into = c("min", "max"), sep = ";")Use function b.plot_min_max
#SEGMENT
b.plot_min_max(segment) + geom_hline(yintercept = c(10,12))#VEIN
b.plot_min_max(vein) + geom_hline(yintercept = c(16,27))#STAMIN_SPIKE
b.plot_min_max(stamin_spike) + geom_hline(yintercept = c(16,34))#PISTIL_SPIKE
b.plot_min_max(pistil_spike) + geom_hline(yintercept = c(5,8))segment_indi<- b.convert.sp.to.specimens(cecropia_min_max, "segment")
table(cut2(segment_indi$segment,g=4))##
## [ 5, 9) [ 9,11) [11,13) [13,24]
## 85 81 50 58
b.plot_min_max(segment) + geom_hline(yintercept = c(9,11,13))nf <- layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(2,4))
par(mar=c(4.5, 4.5, 1.1, 2.1))
boxplot(segment_indi$segment,
horizontal=TRUE, outline=TRUE, frame=F, col = "#FFA500",cex.axis=1.5)
d <- density(segment_indi$segment)
plot(d, main="")
polygon(d, col="#FFA500", border = "#FFA500")nf <- layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(2,4))
par(mar=c(4.5, 4.5, 1.1, 2.1))
boxplot(segment_indi$segment,
horizontal=TRUE, outline=TRUE, frame=F, col = "#FFA500",cex.axis=1.5)
hist(segment_indi$segment, freq = F, col="#FFA500")
lines(density(segment_indi$segment), col="red", lwd=3)ggplot(segment_indi, aes(x=segment))+
geom_density(color="darkblue", fill="lightblue")