data <- read.csv("Wholesale customers data.csv")
# Ambil variabel numerik
data_num <- data[, sapply(data, is.numeric)]
head(data_num)
## Channel Region Fresh Milk Grocery Frozen Detergents_Paper Delicassen
## 1 2 3 12669 9656 7561 214 2674 1338
## 2 2 3 7057 9810 9568 1762 3293 1776
## 3 2 3 6353 8808 7684 2405 3516 7844
## 4 1 3 13265 1196 4221 6404 507 1788
## 5 2 3 22615 5410 7198 3915 1777 5185
## 6 2 3 9413 8259 5126 666 1795 1451
cor_matrix <- cor(data_num)
cor_matrix
## Channel Region Fresh Milk Grocery
## Channel 1.00000000 0.062027619 -0.16917204 0.4607203 0.608792245
## Region 0.06202762 1.000000000 0.05528692 0.0322875 0.007695777
## Fresh -0.16917204 0.055286923 1.00000000 0.1005098 -0.011853875
## Milk 0.46072028 0.032287502 0.10050977 1.0000000 0.728335118
## Grocery 0.60879225 0.007695777 -0.01185387 0.7283351 1.000000000
## Frozen -0.20204596 -0.021044215 0.34588146 0.1239938 -0.040192737
## Detergents_Paper 0.63602637 -0.001482686 -0.10195294 0.6618157 0.924640691
## Delicassen 0.05601143 0.045212107 0.24468997 0.4063683 0.205496511
## Frozen Detergents_Paper Delicassen
## Channel -0.20204596 0.636026367 0.05601143
## Region -0.02104421 -0.001482686 0.04521211
## Fresh 0.34588146 -0.101952938 0.24468997
## Milk 0.12399376 0.661815679 0.40636832
## Grocery -0.04019274 0.924640691 0.20549651
## Frozen 1.00000000 -0.131524906 0.39094747
## Detergents_Paper -0.13152491 1.000000000 0.06929130
## Delicassen 0.39094747 0.069291297 1.00000000
library(corrplot)
## corrplot 0.95 loaded
corrplot(cor_matrix,
method = "color",
type = "upper",
tl.col = "black",
tl.srt = 45)
Correlation Matrix menunjukkan kekuatan dan arah hubungan linear antar variabel.
Terlihat bahwa Grocery, Milk, dan Detergents_Paper memiliki korelasi yang sangat kuat, menunjukkan pola pembelian yang saling berkaitan.
cov_matrix <- cov(data_num)
cov_matrix
## Channel Region Fresh Milk
## Channel 2.190723e-01 0.02247877 -1001.431 1.591511e+03
## Region 2.247877e-02 0.59949783 541.396 1.845044e+02
## Fresh -1.001431e+03 541.39599814 159954927.421 9.381789e+06
## Milk 1.591511e+03 184.50443674 9381788.549 5.446997e+07
## Grocery 2.707890e+03 56.62581280 -1424712.796 5.108319e+07
## Frozen -4.590964e+02 -79.10183268 21236654.585 4.442612e+06
## Detergents_Paper 1.419358e+03 -5.47350901 -6147825.712 2.328834e+07
## Delicassen 7.393256e+01 98.72200766 8727309.970 8.457925e+06
## Grocery Frozen Detergents_Paper Delicassen
## Channel 2.707890e+03 -4.590964e+02 1.419358e+03 7.393256e+01
## Region 5.662581e+01 -7.910183e+01 -5.473509e+00 9.872201e+01
## Fresh -1.424713e+06 2.123665e+07 -6.147826e+06 8.727310e+06
## Milk 5.108319e+07 4.442612e+06 2.328834e+07 8.457925e+06
## Grocery 9.031010e+07 -1.854282e+06 4.189519e+07 5.507291e+06
## Frozen -1.854282e+06 2.356785e+07 -3.044325e+06 5.352342e+06
## Detergents_Paper 4.189519e+07 -3.044325e+06 2.273244e+07 9.316807e+05
## Delicassen 5.507291e+06 5.352342e+06 9.316807e+05 7.952997e+06
corrplot(cov_matrix,
method = "color",
type = "upper",
tl.col = "black",
tl.srt = 45,
is.corr = FALSE)
Variabel Fresh dan Grocery memiliki varians besar, menunjukkan
variasi data tinggi.
Covariance positif antara Grocery dan Detergents_Paper menunjukkan
keduanya meningkat secara bersamaan.
eigen_res <- eigen(cov_matrix)
eigen_values <- eigen_res$values
eigen_vectors <- eigen_res$vectors
eigen_values
## [1] 1.649959e+08 1.454521e+08 2.513998e+07 1.580390e+07 5.392764e+06
## [6] 2.203641e+06 5.956297e-01 1.220552e-01
plot(eigen_values,
type = "b",
pch = 19,
main = "Scree Plot Eigen Values",
xlab = "Komponen",
ylab = "Nilai Eigen")
Eigen value menunjukkan besarnya variasi yang dijelaskan oleh masing-masing komponen.
Jika eigen value pertama jauh lebih besar dibandingkan yang lain, maka sebagian besar variasi data dijelaskan oleh satu faktor utama.
pca_res <- prcomp(data_num, scale. = TRUE)
summary(pca_res)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.7607 1.3379 1.0059 0.8593 0.74608 0.67772 0.53021
## Proportion of Variance 0.3875 0.2238 0.1265 0.0923 0.06958 0.05741 0.03514
## Cumulative Proportion 0.3875 0.6112 0.7377 0.8300 0.89960 0.95701 0.99215
## PC8
## Standard deviation 0.25058
## Proportion of Variance 0.00785
## Cumulative Proportion 1.00000
biplot(pca_res,
scale = 0,
cex = 0.6,
main = "Biplot PCA - Wholesale Customers")
Biasanya Grocery, Milk, dan Detergents_Paper akan mengarah ke arah yang sama, menunjukkan bahwa komponen utama pertama merepresentasikan pola pembelian kebutuhan rumah tangga.
Analisis ini membantu memahami pola konsumsi pelanggan secara multivariat.
Dua, D., & Graff, C. (2019). UCI Machine Learning
Repository.
University of California, Irvine, School of Information and Computer
Sciences.
Retrieved from https://archive.ics.uci.edu/dataset/292/wholesale+customers