# import the file for cluster analysis HBAT CLuster from Hair book
setwd("E:/2 presentation for class/R/data set")
clust1<-read.csv(choose.files())
#check the structure of the data set
str(clust1)
attributes(clust1)
#there are 14 varaibles, X6 to X18 variables with one variable as ID
summary(clust1)
fivenum(clust1)
#check the missing Values
clust1 <- na.omit(clust1) # listwise deletion of missing
#standardization of variables
clust1<-scale(clust1) # can be done all at a time of individually
clust1$x6<- scale(clust1$x6, center = TRUE, scale = TRUE)
clust1$x7<- scale(clust1$x7, center = TRUE, scale = TRUE)
clust1$x8<- scale(clust1$x8, center = TRUE, scale = TRUE)
clust1$x9<- scale(clust1$x9, center = TRUE, scale = TRUE)
clust1$x10<- scale(clust1$x10, center = TRUE, scale = TRUE)
clust1$x11<- scale(clust1$x11, center = TRUE, scale = TRUE)
clust1$x12<- scale(clust1$x12, center = TRUE, scale = TRUE)
clust1$x13<- scale(clust1$x13, center = TRUE, scale = TRUE)
clust1$x14<- scale(clust1$x14, center = TRUE, scale = TRUE)
clust1$x15<- scale(clust1$x15, center = TRUE, scale = TRUE)
clust1$x16<- scale(clust1$x16, center = TRUE, scale = TRUE)
clust1$x17<- scale(clust1$x17, center = TRUE, scale = TRUE)
clust1$x18<- scale(clust1$x18, center = TRUE, scale = TRUE)
#assumptions of cluster analysis
#removing the outliers
#------------------------------------------#
# plot the values on a graph using uniplot
library(mvoutlier)
library(outliers)
library(ggplot2)
clust3<-clust1[,-c(7:14)]
clust4<-clust1[,-c(1:6)]
str(clust3)
str(clust4)
uni.plot(clust3)
uni.plot(clust4)
#-------------------------------------------------------------------
#using IQR outliers are detected now removing the outliers from the varaibles
# there seems to be some outliers in x7, x12, x16, x18, which should be checked and removed
#outliers and multicollinearty
#---------------------------------------------------------#
#univariate outliers
boxplot(clust1$x6)
#---------------------------------
boxplot(clust1$x7) #outliers
boxplot(log(clust1$x7)) # taking log to remove the outliers
benchU7<-quantile(clust1$x7, 0.75)+1.5*IQR(clust1$x7)
benchL7<-quantile(clust1$x7, 0.25)-1.5*IQR(clust1$x7)
# gives the observations which are outliers
x7ou<-clust1$x7[clust1$x7>benchU7]
x7ol<-clust1$x7[clust1$x7<benchL7]
#remove the outliers from the data set
clust1o<-subset(clust1, !((clust1$x7<benchL7)|(clust1$x7>benchU7)), select = c(1:14))
# outliers has to be removed from the initial file clust1 and then the file clust1o will be updated
#-----------------------------------
boxplot(clust1$x8)
boxplot(clust1$x9)
boxplot(clust1$x10)
boxplot(clust1$x11)
#---------------------------------------#
boxplot(clust1$x12) #outliers
boxplot(log(clust1$x12))
# taking log to remove the outliers
benchU12<-quantile(clust1$x12, 0.75)+1.5*IQR(clust1$x12)
benchL12<-quantile(clust1$x12, 0.25)-1.5*IQR(clust1$x12)
# gives the observations which are outliers
x12ou<-clust1$x12[clust1$x12>benchU12]
x12ol<-clust1$x12[clust1$x12<benchL12]
#remove the outliers from the data set
clust1o<-subset(clust1o, !((clust1o$x12<benchL12)|(clust1o$x12>benchU12)), select = c(1:14))
#-----------------------------------
boxplot(clust1$x13)
boxplot(clust1$x14)
boxplot(clust1$x15)
#---------------------------
boxplot(clust1$x16) #outliers
boxplot(log(clust1$x16))
# taking log to remove the outliers
benchU16<-quantile(clust1$x16, 0.75)+1.5*IQR(clust1$x16)
benchL16<-quantile(clust1$x16, 0.25)-1.5*IQR(clust1$x16)
# gives the observations which are outliers
x16ou<-clust1$x16[clust1$x16>benchU16]
x16ol<-clust1$x16[clust1$x16<benchL16]
#remove the outliers from the data set
clust1o<-subset(clust1o, !((clust1o$x16<benchL16)|(clust1o$x16>benchU16)), select = c(1:14))
#-----------------------------------
boxplot(clust1$x17)
#-----------------------------------------
boxplot(clust1$x18) #outliers
boxplot(log(clust1$x18))
# taking log to remove the outliers, does not work
benchU18<-quantile(clust1$x18, 0.75)+1.5*IQR(clust1$x18)
benchL18<-quantile(clust1$x18, 0.25)-1.5*IQR(clust1$x18)
# gives the observations which are outliers
x18ou<-clust1$x18[clust1$x18>benchU18]
x18ol<-clust1$x18[clust1$x18<benchL18]
#remove the outliers from the data set
clust1o<-subset(clust1o, !((clust1o$x18<benchL18)|(clust1o$x18>benchU18)), select = c(1:14))
#------------------------------------------#
#-----------------------------------------
#--------------------------------------------------
#checking the removed outliers
clust3<-clust1o[,-c(7:14)]
clust4<-clust1o[,-c(1:6)]
str(clust3)
str(clust4)
uni.plot(clust3)
uni.plot(clust4)
#-------------------------------------
#univariate normality
#Multivariate normality.
library(MVN)
library(mvtnorm)
library(mvoutlier)
library(mvnormtest)
library(nortest)
library(moments)
library(fBasics)
library(pracma)
#univariate normality using qq plot, boxplot, skewness, kurtosis, standard error of skewness and standard error of kurtosis
attach(clust1o)
qqnorm(x6)
qqline(x6)
boxplot(x6)
basicStats(x6)
skewness(x6)
kurtosis(x6)
shapiro.test(x6)
#----------------------------------
#standard error of skewness and kurosis and standard normal values of skewness and kurtosis are to be claculated
std_error_skew<-function(x6){
library(moments)
k<-length(x6)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
return(kkk)
}
std_error_skew(x6)
#standard error of kurtosis
std_error_kurt<-function(x6){
library(moments)
k<-length(x6)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
kkkk<-(k*k-1)/((k-3)*(k+5))
kkkkK<-2*kkk*sqrt(kkkk)
return(kkkkK)
}
std_error_kurt(x6)
# standard normal values of skewness
std_norm_skew<-skewness(x6)/std_error_skew(x6)
std_norm_kurt<-kurtosis(x6)/std_error_kurt(x6)
std_norm_skew
std_norm_kurt
#taking log reduce the kurtosis
qqnorm(log(x6))
qqline(log(x6))
skewness(log(x6))
kurtosis(log(x6))
shapiro.test(log(x6))
#----------------------------------------------------------------------
detach(clust1o)
#the variable is not normally distributed, there is proplem of kurtosis, which can be reduced by taking log of x6
#----------------------------------x7--------------------------------------
attach(clust1o)
qqnorm(x7)
qqline(x7)
boxplot(x7)
basicStats(x7)
skewness(x7)
kurtosis(x7)
shapiro.test(x7)
#----------------------------------
#standard error of skewness and kurosis and standard normal values of skewness and kurtosis are to be claculated
std_error_skew<-function(x7){
library(moments)
k<-length(x7)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
return(kkk)
}
std_error_skew(x7)
#standard error of kurtosis
std_error_kurt<-function(x7){
library(moments)
k<-length(x7)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
kkkk<-(k*k-1)/((k-3)*(k+5))
kkkkK<-2*kkk*sqrt(kkkk)
return(kkkkK)
}
std_error_kurt(x7)
# standard normal values of skewness
std_norm_skew<-skewness(x7)/std_error_skew(x7)
std_norm_kurt<-kurtosis(x7)/std_error_kurt(x7)
std_norm_skew
std_norm_kurt
#x7 is normallly distributed
#----------------------------------------------------------------------------#
#----------------------------------x8--------------------------------------
attach(clust1o)
qqnorm(x8)
qqline(x8)
boxplot(x8)
basicStats(x8)
skewness(x8)
kurtosis(x8)
shapiro.test(x8)
#----------------------------------
#standard error of skewness and kurosis and standard normal values of skewness and kurtosis are to be claculated
std_error_skew<-function(x8){
library(moments)
k<-length(x8)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
return(kkk)
}
std_error_skew(x8)
#standard error of kurtosis
std_error_kurt<-function(x8){
library(moments)
k<-length(x8)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
kkkk<-(k*k-1)/((k-3)*(k+5))
kkkkK<-2*kkk*sqrt(kkkk)
return(kkkkK)
}
std_error_kurt(x8)
# standard normal values of skewness
std_norm_skew<-skewness(x8)/std_error_skew(x8)
std_norm_kurt<-kurtosis(x8)/std_error_kurt(x8)
std_norm_skew
std_norm_kurt
#x8 is normallly distributed
#----------------------------------------------------------------------------#
#----------------------------------x9--------------------------------------
attach(clust1o)
qqnorm(x9)
qqline(x9)
boxplot(x9)
basicStats(x9)
skewness(x9)
kurtosis(x9)
shapiro.test(x9)
#----------------------------------
#standard error of skewness and kurosis and standard normal values of skewness and kurtosis are to be claculated
std_error_skew<-function(x9){
library(moments)
k<-length(x9)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
return(kkk)
}
std_error_skew(x9)
#standard error of kurtosis
std_error_kurt<-function(x9){
library(moments)
k<-length(x9)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
kkkk<-(k*k-1)/((k-3)*(k+5))
kkkkK<-2*kkk*sqrt(kkkk)
return(kkkkK)
}
std_error_kurt(x9)
# standard normal values of skewness
std_norm_skew<-skewness(x9)/std_error_skew(x9)
std_norm_kurt<-kurtosis(x9)/std_error_kurt(x9)
std_norm_skew
std_norm_kurt
#x9 is normallly distributed
#-------------------------------------------------------------------#
#----------------------------------x10--------------------------------------
attach(clust1o)
qqnorm(x10)
qqline(x10)
boxplot(x10)
basicStats(x10)
skewness(x10)
kurtosis(x10)
shapiro.test(x10)
#----------------------------------
#standard error of skewness and kurosis and standard normal values of skewness and kurtosis are to be claculated
std_error_skew<-function(x10){
library(moments)
k<-length(x10)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
return(kkk)
}
std_error_skew(x10)
#standard error of kurtosis
std_error_kurt<-function(x10){
library(moments)
k<-length(x10)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
kkkk<-(k*k-1)/((k-3)*(k+5))
kkkkK<-2*kkk*sqrt(kkkk)
return(kkkkK)
}
std_error_kurt(x10)
# standard normal values of skewness
std_norm_skew<-skewness(x10)/std_error_skew(x10)
std_norm_kurt<-kurtosis(x10)/std_error_kurt(x10)
std_norm_skew
std_norm_kurt
#x10 is normallly distributed
#-------------------------------------------------------------------#
#----------------------------------x11--------------------------------------
attach(clust1o)
qqnorm(x11)
qqline(x11)
boxplot(x11)
basicStats(x11)
skewness(x11)
kurtosis(x11)
shapiro.test(x11)
#----------------------------------
#standard error of skewness and kurosis and standard normal values of skewness and kurtosis are to be claculated
std_error_skew<-function(x10){
library(moments)
k<-length(x10)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
return(kkk)
}
std_error_skew(x10)
#standard error of kurtosis
std_error_kurt<-function(x10){
library(moments)
k<-length(x10)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
kkkk<-(k*k-1)/((k-3)*(k+5))
kkkkK<-2*kkk*sqrt(kkkk)
return(kkkkK)
}
std_error_kurt(x10)
# standard normal values of skewness
std_norm_skew<-skewness(x10)/std_error_skew(x10)
std_norm_kurt<-kurtosis(x10)/std_error_kurt(x10)
std_norm_skew
std_norm_kurt
#x11 is normallly distributed
#-------------------------------------------------------------------#
#----------------------------------x12--------------------------------------
attach(clust1o)
qqnorm(x12)
qqline(x12)
boxplot(x12)
basicStats(x12)
skewness(x12)
kurtosis(x12)
shapiro.test(x12)
#----------------------------------
#standard error of skewness and kurosis and standard normal values of skewness and kurtosis are to be claculated
std_error_skew<-function(x12){
library(moments)
k<-length(x12)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
return(kkk)
}
std_error_skew(x12)
#standard error of kurtosis
std_error_kurt<-function(x12){
library(moments)
k<-length(x12)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
kkkk<-(k*k-1)/((k-3)*(k+5))
kkkkK<-2*kkk*sqrt(kkkk)
return(kkkkK)
}
std_error_kurt(x12)
# standard normal values of skewness
std_norm_skew<-skewness(x12)/std_error_skew(x12)
std_norm_kurt<-kurtosis(x12)/std_error_kurt(x12)
std_norm_skew
std_norm_kurt
detach(clust1o)
#x12 is normallly distributed
#-------------------------------------------------------------------#
#----------------------------------x13--------------------------------------
attach(clust1o)
qqnorm(x13)
qqline(x13)
boxplot(x13)
basicStats(x13)
skewness(x13)
kurtosis(x13)
shapiro.test(x13)
#----------------------------------
#standard error of skewness and kurosis and standard normal values of skewness and kurtosis are to be claculated
std_error_skew<-function(x13){
library(moments)
k<-length(x13)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
return(kkk)
}
std_error_skew(x13)
#standard error of kurtosis
std_error_kurt<-function(x13){
library(moments)
k<-length(x13)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
kkkk<-(k*k-1)/((k-3)*(k+5))
kkkkK<-2*kkk*sqrt(kkkk)
return(kkkkK)
}
std_error_kurt(x13)
# standard normal values of skewness
std_norm_skew<-skewness(x13)/std_error_skew(x13)
std_norm_kurt<-kurtosis(x13)/std_error_kurt(x13)
std_norm_skew
std_norm_kurt
detach(clust1o)
#x13 is not normallly distributed, so log transformation should be taken to reduce the kurtosis
attach(clust1o)
qqnorm(log(x13))
qqline(log(x13))
boxplot(log(x13))
basicStats(log(x13))
skewness(log(x13))
kurtosis(log(x13))
shapiro.test(log(x13))
#though kurtosis is reduced but still not normal
#-------------------------------------------------------------------#
#----------------------------------x14--------------------------------------
attach(clust1o)
qqnorm(x14)
qqline(x14)
boxplot(x14)
basicStats(x14)
skewness(x14)
kurtosis(x14)
shapiro.test(x14)
#----------------------------------
#standard error of skewness and kurosis and standard normal values of skewness and kurtosis are to be claculated
std_error_skew<-function(x14){
library(moments)
k<-length(x14)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
return(kkk)
}
std_error_skew(x14)
#standard error of kurtosis
std_error_kurt<-function(x14){
library(moments)
k<-length(x14)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
kkkk<-(k*k-1)/((k-3)*(k+5))
kkkkK<-2*kkk*sqrt(kkkk)
return(kkkkK)
}
std_error_kurt(x14)
# standard normal values of skewness
std_norm_skew<-skewness(x14)/std_error_skew(x14)
std_norm_kurt<-kurtosis(x14)/std_error_kurt(x14)
std_norm_skew
std_norm_kurt
detach(clust1o)
#---------------------------------------------------------#
#----------------------------------x15--------------------------------------
attach(clust1o)
qqnorm(x15)
qqline(x15)
boxplot(x15)
basicStats(x15)
skewness(x15)
kurtosis(x15)
shapiro.test(x15)
#----------------------------------
#standard error of skewness and kurosis and standard normal values of skewness and kurtosis are to be claculated
std_error_skew<-function(x15){
library(moments)
k<-length(x15)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
return(kkk)
}
std_error_skew(x15)
#standard error of kurtosis
std_error_kurt<-function(x15){
library(moments)
k<-length(x15)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
kkkk<-(k*k-1)/((k-3)*(k+5))
kkkkK<-2*kkk*sqrt(kkkk)
return(kkkkK)
}
std_error_kurt(x15)
# standard normal values of skewness
std_norm_skew<-skewness(x15)/std_error_skew(x15)
std_norm_kurt<-kurtosis(x15)/std_error_kurt(x15)
std_norm_skew
std_norm_kurt
detach(clust1o)
# x15 is normlay distributed-----------------------------
#-------------------------------------------------------#
#----------------------------------x16--------------------------------------
attach(clust1o)
qqnorm(x16)
qqline(x16)
boxplot(x16)
basicStats(x16)
skewness(x16)
kurtosis(x16)
shapiro.test(x16)
#----------------------------------
#standard error of skewness and kurosis and standard normal values of skewness and kurtosis are to be claculated
std_error_skew<-function(x16){
library(moments)
k<-length(x16)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
return(kkk)
}
std_error_skew(x16)
#standard error of kurtosis
std_error_kurt<-function(x15){
library(moments)
k<-length(x16)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
kkkk<-(k*k-1)/((k-3)*(k+5))
kkkkK<-2*kkk*sqrt(kkkk)
return(kkkkK)
}
std_error_kurt(x16)
# standard normal values of skewness
std_norm_skew<-skewness(x16)/std_error_skew(x16)
std_norm_kurt<-kurtosis(x16)/std_error_kurt(x16)
std_norm_skew
std_norm_kurt
detach(clust1o)
# x16 is not normlay distributed-----------------------------
#----------------------------------x17--------------------------------------
attach(clust1o)
qqnorm(x17)
qqline(x17)
boxplot(x17)
basicStats(x17)
skewness(x17)
kurtosis(x17)
shapiro.test(x17)
#----------------------------------
#standard error of skewness and kurosis and standard normal values of skewness and kurtosis are to be claculated
std_error_skew<-function(x17){
library(moments)
k<-length(x17)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
return(kkk)
}
std_error_skew(x17)
#standard error of kurtosis
std_error_kurt<-function(x17){
library(moments)
k<-length(x17)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
kkkk<-(k*k-1)/((k-3)*(k+5))
kkkkK<-2*kkk*sqrt(kkkk)
return(kkkkK)
}
std_error_kurt(x17)
# standard normal values of skewness
std_norm_skew<-skewness(x17)/std_error_skew(x17)
std_norm_kurt<-kurtosis(x17)/std_error_kurt(x17)
std_norm_skew
std_norm_kurt
detach(clust1o)
# x17 is not normlay distributed-----------------------------
qqnorm(log(x17))
qqline(log(x17))
boxplot(log(x17))
basicStats(log(x17))
skewness(log(x17))
kurtosis(log(x17))
shapiro.test(log(x17))
#taking log reduced the skewness and kurosis and brought near to normal
#-----------------------------------------------------------------------------#
#----------------------------------x18--------------------------------------
attach(clust1o)
qqnorm(x18)
qqline(x18)
boxplot(x18)
basicStats(x18)
skewness(x18)
kurtosis(x18)
shapiro.test(x18)
#----------------------------------
#standard error of skewness and kurosis and standard normal values of skewness and kurtosis are to be claculated
std_error_skew<-function(x18){
library(moments)
k<-length(x18)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
return(kkk)
}
std_error_skew(x18)
#standard error of kurtosis
std_error_kurt<-function(x18){
library(moments)
k<-length(x18)
kk<-(6*k*(k-1))/((k-2)*(k+1)*(k+3))
kkk<-sqrt(kk)
kkkk<-(k*k-1)/((k-3)*(k+5))
kkkkK<-2*kkk*sqrt(kkkk)
return(kkkkK)
}
std_error_kurt(x18)
# standard normal values of skewness
std_norm_skew<-skewness(x18)/std_error_skew(x18)
std_norm_kurt<-kurtosis(x18)/std_error_kurt(x18)
std_norm_skew
std_norm_kurt
detach(clust1o)
# x18 is normlay distributed-----------------------------
# it is found that x6, x13, x16, and x17 are not normaly distributed, though in cluster analysis NOrmality does not affacts a lot.
#multicollinearity using correlation
library(ggplot2)
library(lattice)
corrgram::corrgram(clust1o)
library(Hmisc)
rcorr(as.matrix(clust1o[,1:13]))
#on the basis of the correlation there is some corraltion between some pairs such as x7-> x12, x8->x14, x9-> X16, X18. (since they are very few we can check for x16 and x18, also there are outliers in these variables)
#splom(clust1o[c(1,2,3,4,5,6)])
#splom(clust1o[c(7,8,9,10,11)])
#splom(clust1o[c(12,13,14,15,16,17,18)])
# splom()
# cluster analysis Hierachical cluster analysis package hclust
library(fastcluster)
library(cluster)
library(mclust)
#calcualte the dissimilarity matrix
# distance disimmilarity matrix
#convert the file into matrix if not then also it is done
#clust1o<-as.matrix(clust1o)
#default is euclidean
#dis_m<-daisy(clust1o, metric = c("euclidean", "manhattan", "gower"))
#convert the data.frame into data.matrix--------------------
# remove the ID variable from the data set
clust1<-data.matrix(clust1[,-1])
#calcuklate the simialrity matrix
dis_m<-daisy(clust1[,-1], metric = "gower")
# hierarchical Method
clust_1<-hclust(dis_m, method="complete", members=NULL)
#merging of the clusters
clust_1$merge
clust_1$height
clust_1$order
clust_1$labels
#dentdogram
plot(clust_1)
#scree plot with the help of distance
yaxis<-clust_1$height
xaxis<-c(1:length(clust_1$height))
qplot(xaxis,yaxis, geom = "line")
#----------------------------------
#other method for cluster analysis which is fast and better
clust_2<-hclust.vector(clust1, method="single", members=NULL, metric='euclidean', p=NULL)
#methods- "single", "ward", "centroid" or "median"
#metric-"euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski".
clust_2$merge
clust_2$order
clust_2$labels
clust_2$height
#dentogram
plot(clust_2)
#scree plot
yaxis1<-c(1:length(clust_2$height))
xaxix1<-clust_2$height
qplot(yaxis1,xaxix1, geom = "line")
#______________________________--------------------------------------
#------------------------------------------------------------------
#agnes function in cluster
clust1_5<-agnes(clust1, diss = FALSE, metric = "euclidean", stand = FALSE, method = "average")
print(clust1_5)
clust1_5$order
clust1_5$height
clust1_5$ac
clust1_5$merge
#plot dentogram
plot(clust1_5)
#k-Mean Cluster
library(kmeans.ddR)
#------------------------------------------------------------------------------#
clust1_km<-kmeans(clust1, centers = 5)
clust1_km$cluster
clust1_km$centers
clust1_km$size
clust1_km$totss
clust1_km$withinss
clust1_km$betweenss
#------------------------------------------------------------------------------#
# dkmeans(X, centers, iter.max = 10, nstart = 1, sampling_threshold = 1e+06, trace = FALSE, na_action = c("exclude","fail"), completeModel = FALSE)
#convert the data.matrix into darray
#X <- as.darray(data.matrix(iris2))
library(kmeans.ddR)
library(ddR)
clust1<-as.darray(data.matrix(clust1))
clust1_km2<-dkmeans(clust1, 5, iter.max = 10, nstart = 1, sampling_threshold = 1e+06, trace = FALSE)
clust1_km2
print(clust1_km2)
clust1_km2$centers
clust1_km2$size
clust1_km2$totss
clust1_km2$withinss
clust1_km2$betweenss
clust1_km2$iter