Non Hierarchical Clustering

Nama<-c('Andi','Benny','Budi','Ika','Maya','Ana')
Mat<-c(8.1,5.6,5.2,6.7,8.2,5.7)
Fis<-c(8.3,6.3,5.8,6.8,8.2,6.4)
Bio <- c(7.6,6.1,5.7,5.6,7.4,5.9)
Sej <- c(6.2,7.3,7,7.4,6.4,7.1)
Kew <- c(5.8,7.4,6.8,5.3,5.7,7.2)
Sos <- c(5.4,7.6,7.2,5.4,5.5,7.3)
Sen <- c(6.0,6.0,5.7,7.9,6.1,5.8)
Ujian <- data.frame(Nama,Mat,Fis,Bio,Sej,Kew,Sos,Sen)

plot(Mat,Sos)

boxplot(Ujian[,-1])

jarak<-dist(Ujian[,-1])
as.matrix(jarak)

##           1         2         3        4         5         6
## 1 0.0000000 4.5945620 4.8207883 3.675595 0.3605551 4.3220366
## 2 4.5945620 0.0000000 1.1269428 3.818377 4.4922155 0.5196152
## 3 4.8207883 1.1269428 0.0000000 3.708099 4.7191101 0.9165151
## 4 3.6755952 3.8183766 3.7080992 0.000000 3.4438351 3.6013886
## 5 0.3605551 4.4922155 4.7191101 3.443835 0.0000000 4.2201896
## 6 4.3220366 0.5196152 0.9165151 3.601389 4.2201896 0.0000000

klaster=hclust(jarak,method = 'average')
library(factoextra)

## Warning: package 'factoextra' was built under R version 4.2.3

## Loading required package: ggplot2

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

fviz_dend(klaster)

## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <]8;;https://github.com/kassambara/factoextra/issueshttps://github.com/kassambara/factoextra/issues]8;;>.

clus_kmeans=kmeans(Ujian[,-1],3)
clus_kmeans$cluster

## [1] 3 1 1 2 3 1

clus_kmeans$centers

##    Mat      Fis Bio      Sej      Kew      Sos      Sen
## 1 5.50 6.166667 5.9 7.133333 7.133333 7.366667 5.833333
## 2 6.70 6.800000 5.6 7.400000 5.300000 5.400000 7.900000
## 3 8.15 8.250000 7.5 6.300000 5.750000 5.450000 6.050000

fviz_cluster(clus_kmeans, data=Ujian[,-1])

library(cluster)
clust_kmedoid=pam(Ujian[,-1],3)

clust_kmedoid$clustering

## [1] 1 2 2 3 1 2

clust_kmedoid$medoids

##      Mat Fis Bio Sej Kew Sos Sen
## [1,] 8.2 8.2 7.4 6.4 5.7 5.5 6.1
## [2,] 5.7 6.4 5.9 7.1 7.2 7.3 5.8
## [3,] 6.7 6.8 5.6 7.4 5.3 5.4 7.9

fviz_cluster(clust_kmedoid, data=Ujian[,-1])

#clusplot(clust_kmedoid)

data.mall <- read.csv("D:/MAGISTER (S2)/ASISTENSI/Pelatihan/Pelatihan Kominfo Digitalent 2023/P3/Mall_Customers.csv")

dim(data.mall)

## [1] 200   5

str(data.mall)

## 'data.frame':    200 obs. of  5 variables:
##  $ CustomerID    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Genre         : chr  "Male" "Male" "Female" "Female" ...
##  $ Age           : int  19 21 20 23 31 22 35 23 64 30 ...
##  $ Annual.Income : int  15 15 16 16 17 17 18 18 19 19 ...
##  $ Spending.Score: int  39 81 6 77 40 76 6 94 3 72 ...

data.mall2=data.mall[,3:5]
boxplot(data.mall2)

data_scale=scale(data.mall2)

jrk=dist(data_scale)
aggl=hclust(jrk)
fviz_dend(aggl)

gerombol=eclust(data.mall2, stand=T,
                FUNcluster='kmeans', graph = F)

gerombol2=eclust(data.mall2, stand=T,k=4,
                FUNcluster='kmeans')

gerombol2$centers

##           Age Annual.Income Spending.Score
## 1  0.03711223     0.9876366     -1.1857814
## 2  1.08344244    -0.4893373     -0.3961802
## 3 -0.96008279    -0.7827991      0.3910484
## 4 -0.42773261     0.9724070      1.2130414

fviz_gap_stat(gerombol$gap_stat)

aggregate(data.mall2,by=list(cluster=gerombol2$cluster),FUN = mean)

##   cluster      Age Annual.Income Spending.Score
## 1       1 39.36842      86.50000       19.57895
## 2       2 53.98462      47.70769       39.96923
## 3       3 25.43860      40.00000       60.29825
## 4       4 32.87500      86.10000       81.52500

mall_kmeans=kmeans(data_scale,6)

mall_kmeans$centers

##          Age Annual.Income Spending.Score
## 1 -0.4408110     0.9891010     1.23640011
## 2 -0.8709130    -0.1135003    -0.09334615
## 3  1.2515802    -0.2396117    -0.04388764
## 4  0.4777583    -1.3049552    -1.19344867
## 5 -0.9735839    -1.3221791     1.03458649
## 6  0.2211606     1.0805138    -1.28682305

aggregate(data.mall2,by=list(cluster=mall_kmeans$cluster),FUN = mean)

##   cluster      Age Annual.Income Spending.Score
## 1       1 32.69231      86.53846       82.12821
## 2       2 26.68421      57.57895       47.78947
## 3       3 56.33333      54.26667       49.06667
## 4       4 45.52381      26.28571       19.38095
## 5       5 25.25000      25.83333       76.91667
## 6       6 41.93939      88.93939       16.96970

library(readxl)
runningtime<-read_xlsx('D:\\MAGISTER (S2)\\ASISTENSI\\Pelatihan\\Pelatihan Kominfo Digitalent 2023\\P3\\Running Time.xlsx')

boxplot(runningtime[,-1])

runningtime1 <- scale(runningtime[,-1])

set.seed(10)
clustering <- kmeans(runningtime1,3)
fviz_cluster(clustering,data=runningtime1)

clustering2 <- pam(runningtime1,3)
clusplot(clustering2)

fviz_cluster(clustering2,data=runningtime1)

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

pca <- prcomp(runningtime1)
runningtimepca <- data.frame(runningtime1 %*% pca$rotation)
runningtimepca$cluster <- factor(clustering$cluster)
plot_ly(x=~PC1,y=~PC2,z=~PC3,data=runningtimepca,color = ~cluster)

## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d

## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

runningtime2 <- cbind(runningtime,cluster=clustering$cluster)
{ par(mfrow=c(2,4))
  boxplot(`100m(s)`~cluster,runningtime2)
  boxplot(`200m(s)`~cluster,runningtime2)
  boxplot(`400m(s)`~cluster,runningtime2)
  boxplot(`800m`~cluster,runningtime2)
  boxplot(`1500m`~cluster,runningtime2)
  boxplot(`5000m`~cluster,runningtime2)
  boxplot(`10000m`~cluster,runningtime2)
  boxplot(Marathon~cluster,runningtime2)
par(mfrow=c(1,1))}

Non Hierarchical Clustering

Laily Nissa Atul Mualifah

2023-09-15