Nama<-c('Andi','Benny','Budi','Ika','Maya','Ana')
Mat<-c(8.1,5.6,5.2,6.7,8.2,5.7)
Fis<-c(8.3,6.3,5.8,6.8,8.2,6.4)
Bio <- c(7.6,6.1,5.7,5.6,7.4,5.9)
Sej <- c(6.2,7.3,7,7.4,6.4,7.1)
Kew <- c(5.8,7.4,6.8,5.3,5.7,7.2)
Sos <- c(5.4,7.6,7.2,5.4,5.5,7.3)
Sen <- c(6.0,6.0,5.7,7.9,6.1,5.8)
Ujian <- data.frame(Nama,Mat,Fis,Bio,Sej,Kew,Sos,Sen)
plot(Mat,Sos)

boxplot(Ujian[,-1])

jarak<-dist(Ujian[,-1])
as.matrix(jarak)
## 1 2 3 4 5 6
## 1 0.0000000 4.5945620 4.8207883 3.675595 0.3605551 4.3220366
## 2 4.5945620 0.0000000 1.1269428 3.818377 4.4922155 0.5196152
## 3 4.8207883 1.1269428 0.0000000 3.708099 4.7191101 0.9165151
## 4 3.6755952 3.8183766 3.7080992 0.000000 3.4438351 3.6013886
## 5 0.3605551 4.4922155 4.7191101 3.443835 0.0000000 4.2201896
## 6 4.3220366 0.5196152 0.9165151 3.601389 4.2201896 0.0000000
klaster=hclust(jarak,method = 'average')
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.2.3
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_dend(klaster)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <]8;;https://github.com/kassambara/factoextra/issueshttps://github.com/kassambara/factoextra/issues]8;;>.

clus_kmeans=kmeans(Ujian[,-1],3)
clus_kmeans$cluster
## [1] 3 1 1 2 3 1
clus_kmeans$centers
## Mat Fis Bio Sej Kew Sos Sen
## 1 5.50 6.166667 5.9 7.133333 7.133333 7.366667 5.833333
## 2 6.70 6.800000 5.6 7.400000 5.300000 5.400000 7.900000
## 3 8.15 8.250000 7.5 6.300000 5.750000 5.450000 6.050000
fviz_cluster(clus_kmeans, data=Ujian[,-1])

library(cluster)
clust_kmedoid=pam(Ujian[,-1],3)
clust_kmedoid$clustering
## [1] 1 2 2 3 1 2
clust_kmedoid$medoids
## Mat Fis Bio Sej Kew Sos Sen
## [1,] 8.2 8.2 7.4 6.4 5.7 5.5 6.1
## [2,] 5.7 6.4 5.9 7.1 7.2 7.3 5.8
## [3,] 6.7 6.8 5.6 7.4 5.3 5.4 7.9
fviz_cluster(clust_kmedoid, data=Ujian[,-1])

#clusplot(clust_kmedoid)
data.mall <- read.csv("D:/MAGISTER (S2)/ASISTENSI/Pelatihan/Pelatihan Kominfo Digitalent 2023/P3/Mall_Customers.csv")
dim(data.mall)
## [1] 200 5
str(data.mall)
## 'data.frame': 200 obs. of 5 variables:
## $ CustomerID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Genre : chr "Male" "Male" "Female" "Female" ...
## $ Age : int 19 21 20 23 31 22 35 23 64 30 ...
## $ Annual.Income : int 15 15 16 16 17 17 18 18 19 19 ...
## $ Spending.Score: int 39 81 6 77 40 76 6 94 3 72 ...
data.mall2=data.mall[,3:5]
boxplot(data.mall2)

data_scale=scale(data.mall2)
jrk=dist(data_scale)
aggl=hclust(jrk)
fviz_dend(aggl)

gerombol=eclust(data.mall2, stand=T,
FUNcluster='kmeans', graph = F)
gerombol2=eclust(data.mall2, stand=T,k=4,
FUNcluster='kmeans')

gerombol2$centers
## Age Annual.Income Spending.Score
## 1 0.03711223 0.9876366 -1.1857814
## 2 1.08344244 -0.4893373 -0.3961802
## 3 -0.96008279 -0.7827991 0.3910484
## 4 -0.42773261 0.9724070 1.2130414
fviz_gap_stat(gerombol$gap_stat)

aggregate(data.mall2,by=list(cluster=gerombol2$cluster),FUN = mean)
## cluster Age Annual.Income Spending.Score
## 1 1 39.36842 86.50000 19.57895
## 2 2 53.98462 47.70769 39.96923
## 3 3 25.43860 40.00000 60.29825
## 4 4 32.87500 86.10000 81.52500
mall_kmeans=kmeans(data_scale,6)
mall_kmeans$centers
## Age Annual.Income Spending.Score
## 1 -0.4408110 0.9891010 1.23640011
## 2 -0.8709130 -0.1135003 -0.09334615
## 3 1.2515802 -0.2396117 -0.04388764
## 4 0.4777583 -1.3049552 -1.19344867
## 5 -0.9735839 -1.3221791 1.03458649
## 6 0.2211606 1.0805138 -1.28682305
aggregate(data.mall2,by=list(cluster=mall_kmeans$cluster),FUN = mean)
## cluster Age Annual.Income Spending.Score
## 1 1 32.69231 86.53846 82.12821
## 2 2 26.68421 57.57895 47.78947
## 3 3 56.33333 54.26667 49.06667
## 4 4 45.52381 26.28571 19.38095
## 5 5 25.25000 25.83333 76.91667
## 6 6 41.93939 88.93939 16.96970
library(readxl)
runningtime<-read_xlsx('D:\\MAGISTER (S2)\\ASISTENSI\\Pelatihan\\Pelatihan Kominfo Digitalent 2023\\P3\\Running Time.xlsx')
boxplot(runningtime[,-1])

runningtime1 <- scale(runningtime[,-1])
set.seed(10)
clustering <- kmeans(runningtime1,3)
fviz_cluster(clustering,data=runningtime1)

clustering2 <- pam(runningtime1,3)
clusplot(clustering2)

fviz_cluster(clustering2,data=runningtime1)

library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
pca <- prcomp(runningtime1)
runningtimepca <- data.frame(runningtime1 %*% pca$rotation)
runningtimepca$cluster <- factor(clustering$cluster)
plot_ly(x=~PC1,y=~PC2,z=~PC3,data=runningtimepca,color = ~cluster)
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
runningtime2 <- cbind(runningtime,cluster=clustering$cluster)
{ par(mfrow=c(2,4))
boxplot(`100m(s)`~cluster,runningtime2)
boxplot(`200m(s)`~cluster,runningtime2)
boxplot(`400m(s)`~cluster,runningtime2)
boxplot(`800m`~cluster,runningtime2)
boxplot(`1500m`~cluster,runningtime2)
boxplot(`5000m`~cluster,runningtime2)
boxplot(`10000m`~cluster,runningtime2)
boxplot(Marathon~cluster,runningtime2)
par(mfrow=c(1,1))}
