Load package for this code

library(stringr) #STring analysis
library(dplyr)  #DATAFRAME analysis
library(cluster) #cluster analysis
library(dendextend) #plot tree

FUNCTION DEFINITION

## function for replace all pattern in dataframe. need for replace "?" and
# "not applicable" to "NA" in dataframe
b.replace.dataframe=function(dataframe,pattern,replacement){
    row_name=rownames(dataframe)
    a=sapply(dataframe, function(x)str_replace_all(x, pattern,replacement))
    a=as.data.frame(a)
    rownames(a)=row_name
    return(a)
}

### This function for plot tree hierachical with package (dendextend)
b.plot.cluster=function(data.hc,k=3,title="Hierarchical Clustering Plot"){
    dend =as.dendrogram(data.hc)
    dend %>% set("branches_k_color", k = k) %>%
        set("branches_lwd", 1:1)%>%
        color_labels(k)%>%
        set("leaves_pch", 19)%>%
        set("leaves_cex", 1)%>%
        set("leaves_col","blue")%>%
        plot(main =title, horiz=T)
}
## function cal percent inertia of axes for function cmdscale
## par. cmd is result of function cmdscale(eig=T)
b.percent.inertia=function(cmd){
    axes.total=cmd$eig
    inertia = sum(cmd$eig[cmd$eig> 0]) 
    percents = round(as.numeric(100 * axes.total/inertia), digits = 2)
    percents
}
### this function for plot Principal Coordinate Analysis (PCoA)


b.plot.pcoa=function(cmd,x=1,y=2,title="Principal Coordinate Analysis (PCoA)"){
    percent=b.percent.inertia(cmd)
    cmd=cmd$points
    plot(cmd[,x], cmd[,y], main=title,pch=19, col="deepskyblue2",
         xlab = paste("Axe", x,"(",percent[x],"% of inertia)"), 
         ylab = paste("Axe",y,"(",percent[y],"% of inertia)"))
    textxy(cmd[,x], cmd[,y], row.names(cmd),cex=1, offset = 0.5)
}

DATA wrangling

Load data

cecropia=read.table("cecropia.csv", header = TRUE, row.names = 1, sep=",")
COLNAME=read.table("COL_NAMES.csv", header = TRUE, sep=",")

Rename columns and row names

#Rename_row_name sp1->sp61
row_name=rownames(cecropia)
rownames(cecropia)=paste0('sp',c(1:61))
#Rename_col_name to C1->C96
col_name=colnames(cecropia)
colnames(cecropia)=paste0("C",seq(1:length(col_name)))

replace “?”, “not applicable” to “NA”

cecropia=b.replace.dataframe(cecropia,"not applicable","NA")
cecropia=b.replace.dataframe(cecropia,"\\?","NA")

GET SUBDATASET

#SELECT Column  have not missed_value and REMOVE column C58 LOCATION
df_col_need=filter(COLNAME, Missed_value!=1 & TYPE!=3)
list_col_need=paste(df_col_need$CODE)
#list columm vegetative
df_col_need_veg=filter(df_col_need, TYPE==1)
list_col_need_veg=paste(df_col_need_veg$CODE)
#list columm reproductive
df_col_need_rep=filter(df_col_need, TYPE==2)
list_col_need_rep=paste(df_col_need_rep$CODE)

# GET DATASET FOR ANALYSIS
#1. full collumn (dont have location, and missed_value)
cecropia_full=select(cecropia,one_of(list_col_need))
#2. VEGETATIVE collumn
cecropia_veg=select(cecropia_full,one_of(list_col_need_veg))
#2. REPODUCTIVE collumn
cecropia_rep=select(cecropia_full,one_of(list_col_need_rep))

ANALYSIS DATA

Fllow method analysis for qualitative variable in paper Pierre et al. 2014 “Multivariate morphometric analysis and species delimitation in the endemic New Caledonian genus Storthocalyx (Sapindaceae)” Using R with package cluster, metrice gower, method ward

# need load library(cluster)
# Dissimilarity Matrix Calculation with funtion daisy(), metrice="gower"
ce.full.dmc = daisy(cecropia_full, metric = "gower")
ce.veg.dmc = daisy(cecropia_veg, metric = "gower")
ce.rep.dmc = daisy(cecropia_rep, metric = "gower")
 

# Hierarchical Clustering with function agnes(), method="ward"
ce.full.hc = agnes(ce.full.dmc, method = 'ward')
ce.veg.hc = agnes(ce.veg.dmc, method = 'ward')
ce.rep.hc = agnes(ce.rep.dmc, method = 'ward')


## Principal Coordinate Analysis (PCoA)
cmd.ce.full = cmdscale(ce.full.dmc, eig=T)
cmd.ce.veg = cmdscale(ce.veg.dmc, eig=T)
cmd.ce.rep = cmdscale(ce.rep.dmc, eig=T)

PLOT

Data Cecropia full collumn

library(calibrate) #add label to point
b.plot.pcoa(cmd.ce.full, title = "PCoA of full")

b.plot.cluster(ce.full.hc, k=7, title="HC full")
abline(v=0.97, lty=2)

Data Cecropia vegetative collumn

b.plot.pcoa(cmd.ce.veg, title = "PCoA of vegetative")

b.plot.cluster(ce.veg.hc, k=8, title="HC vegetative")
abline(v=1, lty=2)

Data Cecropia reproductive collumn

b.plot.pcoa(cmd.ce.rep,title = "PCoA of repoductive")

b.plot.cluster(ce.rep.hc, k=4, title="HC reproductive")
abline(v=1.15, lty=2)