#install.package("plyr")
library(plyr)
x <- runif(50) # generating 50 random number
x
##  [1] 0.23224092 0.82896282 0.96868378 0.51702072 0.12678817 0.21585107
##  [7] 0.45153020 0.90498076 0.02772658 0.74339313 0.29006730 0.90794485
## [13] 0.48535428 0.55149362 0.75518320 0.84138066 0.17597947 0.85146251
## [19] 0.22629888 0.13597440 0.16221694 0.67780688 0.17046110 0.74925605
## [25] 0.20919972 0.05998034 0.53749238 0.03094384 0.72886750 0.37087390
## [31] 0.61466659 0.13522688 0.91799948 0.52104240 0.35598070 0.30301406
## [37] 0.22781707 0.68670288 0.49663559 0.67645157 0.14987676 0.27316356
## [43] 0.42939979 0.80448297 0.36797164 0.13210718 0.01464530 0.06255951
## [49] 0.11482487 0.05699042
y <- runif(50) # generating 50 random number
y
##  [1] 0.75069832 0.69168215 0.69193974 0.46237069 0.35556811 0.27175687
##  [7] 0.66913040 0.63245801 0.87826657 0.67913815 0.50202132 0.98848389
## [13] 0.38263397 0.36652310 0.81918992 0.41650463 0.28704383 0.74862058
## [19] 0.99671565 0.41330482 0.92558150 0.06073051 0.07024213 0.32452020
## [25] 0.19596377 0.98341438 0.64636562 0.67565875 0.80305686 0.81034809
## [31] 0.37971065 0.13625850 0.05861809 0.64242001 0.67543551 0.27090170
## [37] 0.94139608 0.89671464 0.06628255 0.88710197 0.63878606 0.67690682
## [43] 0.09137363 0.91929956 0.67592389 0.46815074 0.03737657 0.01634542
## [49] 0.97282896 0.77382226
data <- cbind(x,y)
dim(data)
## [1] 50  2
windows()
plot(data)

plot(data, type="n")
text(data, rownames(data))

d<-dist(data,method='euclidean')
head(d)
## [1] 0.5996332 0.7387832 0.4052559 0.4089599 0.4792218 0.2339682
fit <-hclust(d, method='complete')
windows()
plot(fit)

### example of more sample dendogram is cluttered

w <- runif(300) 
z <- runif(300) 
data1 <- cbind(w,z)
d1<-dist(data,method='euclidean')
fit <-hclust(d1, method='complete')
windows()
plot(fit)

###############

kmeans clustering

km<- kmeans(data,4) #kmeans clustering
km$cluster
##  [1] 3 2 2 1 4 4 3 2 3 2 4 2 1 1 2 1 4 2 3 4 3 1 4 1 4 3 2 3 2 3 1 4 1 2 3
## [36] 4 3 2 1 2 3 3 1 2 3 4 4 4 3 3
cluster <- as.data.frame(km$cluster)
final <- cbind(cluster,data)

Animation, note that the diagram will differ everytime the whole program is run because we are using random samples

#install.packages("animation")
library(animation)
## Warning: package 'animation' was built under R version 3.6.1
windows()
km <- kmeans.ani(data,4)

km$centers
##              x         y
## [1,] 0.4248242 0.5456327
## [2,] 0.1508900 0.8497742
## [3,] 0.8118394 0.6826663
## [4,] 0.2425801 0.1748576
km$cluster
##  [1] 2 3 3 1 4 4 1 3 2 3 1 3 1 1 3 3 4 3 2 4 2 4 4 3 4 2 1 2 3 2 1 4 3 1 1
## [36] 4 2 3 4 3 2 1 4 3 1 1 4 4 2 2

elbow curve & k ~ sqrt(n/2) to decide the k value

wss = (nrow(data)-1)* sum(apply(data, 2, var)) #determine number of cluster
for(i in 2:8) wss[i] = sum(kmeans(data, centers=i)$withinss)
plot(1:8, wss, type="b", xlab="Number of Clusters",ylab="Within groups sum")