Persiapan data dan library

Library

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(psych)        # Analisis faktor & psikometri 
## Warning: package 'psych' was built under R version 4.4.3
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(GPArotation)  #Rotasi faktor 
## Warning: package 'GPArotation' was built under R version 4.4.3
## 
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
library(factoextra)   # Visualisasi multivariat 
## Warning: package 'factoextra' was built under R version 4.4.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(dplyr)        # Manipulasi data
## Warning: package 'dplyr' was built under R version 4.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Import Data

data(iris)
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
# Hilangkan kolom spesies karena analisis ini unsupervised
iris_data <- iris[, -5]

# Standarisasi data
iris_scaled <- scale(iris_data)

Analisis Faktor (PCA)

# Menentukan jumlah faktor (maksimum = jumlah variabel)
pca <- principal(iris_scaled, nfactors = 4, rotate = "none")
print(pca)
## Principal Components Analysis
## Call: principal(r = iris_scaled, nfactors = 4, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                PC1  PC2   PC3   PC4 h2       u2 com
## Sepal.Length  0.89 0.36 -0.28 -0.04  1  1.1e-16 1.5
## Sepal.Width  -0.46 0.88  0.09  0.02  1  4.4e-16 1.5
## Petal.Length  0.99 0.02  0.05  0.12  1 -4.4e-16 1.0
## Petal.Width   0.96 0.06  0.24 -0.08  1  0.0e+00 1.1
## 
##                        PC1  PC2  PC3  PC4
## SS loadings           2.92 0.91 0.15 0.02
## Proportion Var        0.73 0.23 0.04 0.01
## Cumulative Var        0.73 0.96 0.99 1.00
## Proportion Explained  0.73 0.23 0.04 0.01
## Cumulative Proportion 0.73 0.96 0.99 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 4 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0 
##  with the empirical chi square  0  with prob <  NA 
## 
## Fit based upon off diagonal values = 1
# Scree Plot untuk melihat jumlah komponen utama yang signifikan
eigen_values <- pca$values
plot(eigen_values, type = "b", main = "Scree Plot",
     xlab = "Komponen", ylab = "Eigenvalue",
     pch = 16, col = "blue")
abline(h = 1, col = "red", lty = 2, lwd = 2)
text(1:length(eigen_values), eigen_values,
     labels = round(eigen_values, 2), pos = 3, cex = 0.8)
legend("topright", legend = "Eigenvalue = 1",
       col = "red", lty = 2, lwd = 2)

Interpretasi singkat: Komponen dengan eigenvalue > 1 dianggap signifikan. Biasanya 2 komponen pertama cukup menjelaskan sebagian besar variasi data.

Penentuan Jumlah Klaster Optimal (Elbow Method)

wss <- numeric(10)
for (k in 1:10) {
  set.seed(42)
  km <- kmeans(iris_scaled, centers = k, nstart = 25)
  wss[k] <- km$tot.withinss
}

plot(1:10, wss, type = "b", pch = 19, frame = FALSE,
     xlab = "Jumlah Cluster (k)",
     ylab = "Total Within Sum of Squares",
     main = "Metode Elbow untuk Menentukan k Optimal")

Analisis Gerombol Non-Hierrarki (K-Means)

set.seed(42)
k <- 3
kmeans_result <- kmeans(iris_scaled, centers = k, nstart = 25)

# Lihat hasil K-Means
kmeans_result
## K-means clustering with 3 clusters of sizes 50, 53, 47
## 
## Cluster means:
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1  -1.01119138  0.85041372   -1.3006301  -1.2507035
## 2  -0.05005221 -0.88042696    0.3465767   0.2805873
## 3   1.13217737  0.08812645    0.9928284   1.0141287
## 
## Clustering vector:
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 3 2 2 2 3 2 2 2 2 2 2 2 2 3 2 2 2 2 3 2 2 2
##  [75] 2 3 3 3 2 2 2 2 2 2 2 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 3 3 3 3 2 3 3 3 3
## [112] 3 3 2 2 3 3 3 3 2 3 2 3 2 3 3 2 3 3 3 3 3 3 2 2 3 3 3 2 3 3 3 2 3 3 3 2 3
## [149] 3 2
## 
## Within cluster sum of squares by cluster:
## [1] 47.35062 44.08754 47.45019
##  (between_SS / total_SS =  76.7 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
# Gabungkan hasil cluster dengan data asli
iris_cluster <- cbind(iris, cluster = as.factor(kmeans_result$cluster))

# Perbandingan hasil cluster dengan spesies asli
table(iris_cluster$cluster, iris_cluster$Species)
##    
##     setosa versicolor virginica
##   1     50          0         0
##   2      0         39        14
##   3      0         11        36

Visualisasi Hasil Clustering

# Visualisasi dengan dua variabel Petal.Length & Petal.Width
ggplot(iris_cluster, aes(x = Petal.Length, y = Petal.Width,
                         color = cluster, shape = Species)) +
  geom_point(size = 3) +
  labs(title = "Hasil K-Means Clustering pada Data Iris",
       x = "Petal Length", y = "Petal Width") +
  theme_minimal()

# Visualisasi tambahan dengan factoextra
fviz_cluster(kmeans_result, data = iris_scaled,
             geom = "point", stand = FALSE,
             ellipse = TRUE, show.clust.cent = TRUE) +
  labs(title = "Visualisasi Cluster dengan Factoextra")

# Interpretasi - Berdasarkan tabel perbandingan, cluster 1 memuat sebagian besar Iris setosa. - Cluster 2 dan 3 mencampur versicolor dan virginica (karena mirip secara morfologi). - Dua variabel petal (panjang & lebar kelopak) paling berperan dalam pembentukan cluster. - Hasil ini sesuai dengan karakteristik asli spesies iris.