library(cluster)
library(ggplot2)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(purrr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(tidyr)
LAMPIRAN 9. PREPROCESSING DATA
#mengambil dataset
dataraw <- read.csv("AA dataraw.csv",header = TRUE,sep = ",")
datavar <- dataraw[,21:24]
#normalisasi data
data_std <- scale(datavar)
str(dataraw)
## 'data.frame': 400 obs. of 24 variables:
## $ RESPONDEN: chr "RES1" "RES2" "RES3" "RES4" ...
## $ ANGKATAN : int 19 19 19 19 19 19 19 19 19 21 ...
## $ FAKULTAS : chr "FST" "FST" "FST" "FST" ...
## $ P1 : int 4 4 3 4 4 1 3 4 4 3 ...
## $ P2 : int 4 4 3 4 4 2 4 4 4 3 ...
## $ P3 : int 3 3 3 3 3 2 3 4 4 4 ...
## $ P4 : int 3 3 3 3 3 2 3 3 3 3 ...
## $ P5 : int 4 4 3 4 3 2 3 2 3 3 ...
## $ P6 : int 4 3 4 3 3 2 3 2 2 3 ...
## $ P7 : int 4 4 4 4 4 1 3 4 4 3 ...
## $ P8 : int 4 3 4 3 4 1 3 4 4 3 ...
## $ P9 : int 4 2 2 2 3 2 3 1 1 1 ...
## $ P10 : int 4 4 4 4 4 2 4 4 4 4 ...
## $ P11 : int 4 2 2 2 3 2 3 1 1 1 ...
## $ P12 : int 4 2 2 2 4 1 4 1 1 1 ...
## $ P13 : int 4 3 4 3 4 2 3 3 4 3 ...
## $ P14 : int 4 4 4 3 4 1 3 3 1 3 ...
## $ P15 : int 4 3 4 4 4 2 3 3 4 3 ...
## $ P16 : int 4 3 4 4 3 1 4 3 2 3 ...
## $ TOTAL : int 62 51 53 52 57 26 52 46 46 44 ...
## $ V1 : int 4 4 3 4 4 2 3 4 4 3 ...
## $ V2 : int 4 4 4 4 4 2 3 3 3 3 ...
## $ V3 : int 4 3 3 3 4 2 4 2 2 2 ...
## $ V4 : int 4 3 4 4 4 2 3 3 3 3 ...
str(datavar)
## 'data.frame': 400 obs. of 4 variables:
## $ V1: int 4 4 3 4 4 2 3 4 4 3 ...
## $ V2: int 4 4 4 4 4 2 3 3 3 3 ...
## $ V3: int 4 3 3 3 4 2 4 2 2 2 ...
## $ V4: int 4 3 4 4 4 2 3 3 3 3 ...
str(data.frame(data_std))
## 'data.frame': 400 obs. of 4 variables:
## $ V1: num 1.026 1.026 -0.404 1.026 1.026 ...
## $ V2: num 0.923 0.923 0.923 0.923 0.923 ...
## $ V3: num 2.051 0.802 0.802 0.802 2.051 ...
## $ V4: num 1.063 -0.408 1.063 1.063 1.063 ...
LAMPIRAN 10. K-MEANS CLUSTERING DAN PLOT
- Hasil K-Means tidak konstan (berbeda dari hasil di laporan)
minclus<- 2
maxclus<- 9
#Vektor untuk menyimpan ukuran kluster
vect1_size <- vector("list", length = maxclus - minclus + 1)
for (k in minclus:maxclus){
#KMEANS
forresult1 <- kmeans(data_std, k)
#Mengambil ukuran klaster
vect1_size[[k - minclus + 1]] <- table(forresult1$cluster)
#Menyimpan data klasterisasi
assign(paste("kmeans_c",k, sep = ""), forresult1)
#Membuat plot klasterisasi
formain1 <- paste("Kmeans (C=",k,")")
forvis1 <- fviz_cluster(forresult1, data = data_std, main = formain1, xlab = FALSE, ylab = FALSE, labelsize = 0)
assign(paste("kmeans_v",k, sep = ""), forvis1)
}
#Memunculkan data pada Tabel 17. Hasil K-Means Clustering
tabel1_size <- bind_rows(vect1_size)
finalt_kmeans <- as.data.frame(tabel1_size)
finalt_kmeans
## 1 2 3 4 5 6 7 8 9
## 1 193 207 NA NA NA NA NA NA NA
## 2 161 124 115 NA NA NA NA NA NA
## 3 70 149 23 158 NA NA NA NA NA
## 4 139 47 56 73 85 NA NA NA NA
## 5 140 83 21 25 122 9 NA NA NA
## 6 114 7 26 126 6 73 48 NA NA
## 7 47 10 41 33 26 99 3 141 NA
## 8 112 90 53 15 6 99 13 10 2
#Menampilkan plot pada Gambar 9. Visualisasi K-Means
grid.arrange(kmeans_v2, kmeans_v3, kmeans_v4, kmeans_v5, ncol = 2)

grid.arrange(kmeans_v6, kmeans_v7, ncol = 2)

grid.arrange(kmeans_v8, kmeans_v9, ncol = 2)

LAMPIRAN 11. K-MEDOIDS CLUSTERING DAN PLOT
#vektor menymimpan ukuran klaster
vect2_size <- vector("list", length = maxclus - minclus + 1)
for (k in minclus:maxclus){
#KMEDOIDS
forresult2 <- pam(data_std, k)
#Mengambil ukuran klaster
vect2_size[[k - minclus + 1]] <- table(forresult2$clustering)
#Menyimpan data klasterisasi
assign(paste("kmedoids_c",k, sep = ""), forresult2)
#Membuat plot klasterisasi
formain2 <- paste("Kmedoids (C=",k,")")
forvis2 <- fviz_cluster(forresult2, data = data_std, main = formain2, xlab = FALSE, ylab = FALSE, labelsize = 0)
assign(paste("kmedoids_v",k, sep = ""), forvis2)
}
#Memunculkan data pada Tabel 18. Hasil K-Medoids Clustering
tabel2_size <- bind_rows(vect2_size)
finalt_kmedoids <- as.data.frame(tabel2_size)
finalt_kmedoids
## 1 2 3 4 5 6 7 8 9
## 1 156 244 NA NA NA NA NA NA NA
## 2 83 238 79 NA NA NA NA NA NA
## 3 80 160 81 79 NA NA NA NA NA
## 4 80 30 73 138 79 NA NA NA NA
## 5 80 30 60 115 52 63 NA NA NA
## 6 80 30 49 30 96 52 63 NA NA
## 7 24 56 30 49 30 96 52 63 NA
## 8 24 22 50 30 49 30 96 36 63
#Menampilkan plot pada Gambar 10. Visualisasi
grid.arrange(kmedoids_v2, kmedoids_v3, kmedoids_v4, kmedoids_v5, ncol = 2)

grid.arrange(kmedoids_v6, kmedoids_v7, ncol = 2)

grid.arrange(kmedoids_v8, kmedoids_v9, ncol = 2)

LAMPIRAN 12. UJI VALIDITAS
#Membuat grafik klaster K-Means
opt1_sil<-fviz_nbclust(data_std, kmeans, method='silhouette')
opt1_elb<-fviz_nbclust(data_std, kmeans, method='wss')
opt1_gap<-fviz_nbclust(data_std, kmeans, method='gap_stat')
#Membuat grafik klaster K-Medoids
opt2_sil<-fviz_nbclust(data_std, pam, method='silhouette')
opt2_elb<-fviz_nbclust(data_std, pam, method='wss')
opt2_gap<-fviz_nbclust(data_std, pam, method='gap_stat')
#Menampilkan grafik klaster optimal berdasarkan metode uji validitas
#Gambar 11. Visualisasi Elbow Method
grid.arrange(opt1_elb, opt2_elb,ncol = 2)

#Gambar 12. Visualisasi Gap Statistics
grid.arrange(opt1_gap, opt2_gap,ncol = 2)

#Gambar 13. Visualisasi Silhouette score
grid.arrange(opt1_sil, opt2_sil,ncol = 2)

LAMPIRAN 13. VISUALISASI KLASTERISASI C=5
#Gambar 14. Visualisasi Klasterisasi Optimal C=5
grid.arrange(kmeans_v5, kmedoids_v5, ncol = 2)

LAMPIRAN 14. CENTROID K-MEANS
- Hasil K-Means tidak konstan (berbeda dari hasil di laporan)
- Di laporan klaster berdasarkan rata-rata
#Mengambil data K-Means C=5
kmeans_final <- get(paste("kmeans_c",5, sep = ""))
#Mengambil ukuran klaster pada K-Means C=5
sizecentroid <- data.frame(size = kmeans_final$size)
#Mengambil pusat klaster pada K-Means C=5
kmeans_centroid <- datavar %>%
mutate(Klaster = kmeans_final$cluster) %>%
group_by(Klaster) %>%
summarise(V1 = mean(V1), V2 = mean(V2), V3 = mean(V3), V4 = mean(V4))
#Menampilkan Tabel 20. Centroid K-Means (Asli)
finalt_centroid <- cbind(kmeans_centroid,sizecentroid)
finalt_centroid
## Klaster V1 V2 V3 V4 size
## 1 1 2.661871 2.733813 2.338129 2.748201 139
## 2 2 2.957447 3.765957 2.638298 3.702128 47
## 3 3 3.821429 3.821429 1.642857 4.000000 56
## 4 4 3.479452 3.506849 1.643836 2.876712 73
## 5 5 3.952941 3.752941 3.317647 3.776471 85
LAMPIRAN 15. MEDOIDS K-MEDOIDS
- Di laporan urutan klaster berdasarkan rata-rata
#Mengambil data K-Medoids C=5
kmedoids_final <- get(paste("kmedoids_c",5, sep = ""))
#Mengambil ukuran klaster pada K-Medoids C=5
sizemedoids <-data.frame(size=kmedoids_final$clusinfo[,1])
#Mengambil pusat klaster pada K-Medoids C=5
kmedoids_medoids <-data.frame(responden=kmedoids_final$id.med)
kmedoids_medoids
## responden
## 1 399
## 2 333
## 3 393
## 4 400
## 5 385
#Menampilkan Tabel 22. Medoids K-Medoids
finalt_medoids <- cbind(kmedoids_medoids,datavar[c(399, 333, 393, 400, 385),],sizemedoids)
finalt_medoids
## responden V1 V2 V3 V4 size
## 399 399 4 4 3 4 80
## 333 333 2 2 2 2 30
## 393 393 3 3 3 3 73
## 400 400 3 3 2 3 138
## 385 385 4 4 2 4 79
LAMPIRAN 16. HASIL KLASTERISASI K-MEANS DAN K-MEDOIDS
#Menambahkan label klaster pada dataraw yang terkumpul
#dataraw dapat dilihat di Lampiran 9
final_data <- cbind(dataraw[,1:19], kmeans=kmeans_final$cluster, kmedoids=kmedoids_final$clustering)
str(final_data)
## 'data.frame': 400 obs. of 21 variables:
## $ RESPONDEN: chr "RES1" "RES2" "RES3" "RES4" ...
## $ ANGKATAN : int 19 19 19 19 19 19 19 19 19 21 ...
## $ FAKULTAS : chr "FST" "FST" "FST" "FST" ...
## $ P1 : int 4 4 3 4 4 1 3 4 4 3 ...
## $ P2 : int 4 4 3 4 4 2 4 4 4 3 ...
## $ P3 : int 3 3 3 3 3 2 3 4 4 4 ...
## $ P4 : int 3 3 3 3 3 2 3 3 3 3 ...
## $ P5 : int 4 4 3 4 3 2 3 2 3 3 ...
## $ P6 : int 4 3 4 3 3 2 3 2 2 3 ...
## $ P7 : int 4 4 4 4 4 1 3 4 4 3 ...
## $ P8 : int 4 3 4 3 4 1 3 4 4 3 ...
## $ P9 : int 4 2 2 2 3 2 3 1 1 1 ...
## $ P10 : int 4 4 4 4 4 2 4 4 4 4 ...
## $ P11 : int 4 2 2 2 3 2 3 1 1 1 ...
## $ P12 : int 4 2 2 2 4 1 4 1 1 1 ...
## $ P13 : int 4 3 4 3 4 2 3 3 4 3 ...
## $ P14 : int 4 4 4 3 4 1 3 3 1 3 ...
## $ P15 : int 4 3 4 4 4 2 3 3 4 3 ...
## $ P16 : int 4 3 4 4 3 1 4 3 2 3 ...
## $ kmeans : int 5 5 2 5 5 1 1 4 4 1 ...
## $ kmedoids : int 1 1 1 1 1 2 3 4 4 4 ...
LAMPIRAN 17. SILHOUETTE SCORE C = 5
- Hasil K-Means tidak konstan (berbeda dari hasil di laporan)
silresult1 = silhouette(kmeans_c5$cluster, dist(data_std))
silresult1 = mean(silresult1[,3])
silresult2 = silhouette(kmedoids_c5$cluster, dist(data_std))
silresult2 = mean(silresult2[,3])
#nilai Sihouette score
cat("Silhouette score K-Means C = 5 adalah ", silresult1, "\nSilhouette score K-Medoids C = 5 adalah ", silresult2)
## Silhouette score K-Means C = 5 adalah 0.3000067
## Silhouette score K-Medoids C = 5 adalah 0.3666158