library(psych)
library(dplyr)
library(FactoMineR)
library(factoextra)
library(GPArotation)
library(reshape2)
library(ggplot2)
Dataset yang digunakan dalam penelitian ini adalah Spotify Recommendation Dataset yang diperoleh dari platform Kaggle dan dapat diakses melalui tautan berikut, https://www.kaggle.com/datasets/bricevergnou/spotify-recommendation.
Dimana dataset ini memiliki 13 variabel, yaitu 12 variabel fitur audio seperti acousticness, danceability, energy, tempo, valence, dan lainnya, serta 1 variabel target bernama “liked” yang bersifat biner, yaitu 1 (disukai) dan 0 (tidak disukai).
# Import Data
data <- read.csv("data.csv")
knitr::kable(head(data))
| danceability | energy | key | loudness | mode | speechiness | acousticness | instrumentalness | liveness | valence | tempo | duration_ms | time_signature | liked |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.803 | 0.6240 | 7 | -6.764 | 0 | 0.0477 | 0.4510 | 7.34e-04 | 0.1000 | 0.6280 | 95.968 | 304524 | 4 | 0 |
| 0.762 | 0.7030 | 10 | -7.951 | 0 | 0.3060 | 0.2060 | 0.00e+00 | 0.0912 | 0.5190 | 151.329 | 247178 | 4 | 1 |
| 0.261 | 0.0149 | 1 | -27.528 | 1 | 0.0419 | 0.9920 | 8.97e-01 | 0.1020 | 0.0382 | 75.296 | 286987 | 4 | 0 |
| 0.722 | 0.7360 | 3 | -6.994 | 0 | 0.0585 | 0.4310 | 1.20e-06 | 0.1230 | 0.5820 | 89.860 | 208920 | 4 | 1 |
| 0.787 | 0.5720 | 1 | -7.516 | 1 | 0.2220 | 0.1450 | 0.00e+00 | 0.0753 | 0.6470 | 155.117 | 179413 | 4 | 1 |
| 0.778 | 0.6320 | 8 | -6.415 | 1 | 0.1250 | 0.0404 | 0.00e+00 | 0.0912 | 0.8270 | 140.951 | 224029 | 4 | 1 |
# Struktur dan Tipe Data
str(data)
## 'data.frame': 195 obs. of 14 variables:
## $ danceability : num 0.803 0.762 0.261 0.722 0.787 0.778 0.666 0.922 0.794 0.853 ...
## $ energy : num 0.624 0.703 0.0149 0.736 0.572 0.632 0.589 0.712 0.659 0.668 ...
## $ key : int 7 10 1 3 1 8 0 7 7 3 ...
## $ loudness : num -6.76 -7.95 -27.53 -6.99 -7.52 ...
## $ mode : int 0 0 1 0 1 1 0 1 0 1 ...
## $ speechiness : num 0.0477 0.306 0.0419 0.0585 0.222 0.125 0.324 0.171 0.0498 0.447 ...
## $ acousticness : num 0.451 0.206 0.992 0.431 0.145 0.0404 0.555 0.0779 0.143 0.263 ...
## $ instrumentalness: num 7.34e-04 0.00 8.97e-01 1.18e-06 0.00 0.00 0.00 3.96e-05 2.24e-03 0.00 ...
## $ liveness : num 0.1 0.0912 0.102 0.123 0.0753 0.0912 0.114 0.175 0.0944 0.104 ...
## $ valence : num 0.628 0.519 0.0382 0.582 0.647 0.827 0.776 0.904 0.308 0.745 ...
## $ tempo : num 96 151.3 75.3 89.9 155.1 ...
## $ duration_ms : int 304524 247178 286987 208920 179413 224029 146053 161800 247460 165363 ...
## $ time_signature : int 4 4 4 4 4 4 4 4 4 4 ...
## $ liked : int 0 1 0 1 1 1 1 1 0 1 ...
Untuk mengetahui distribusi data, sehingga dapat dilakukan analisis lanjutan
summary(data)
## danceability energy key loudness
## Min. :0.1300 Min. :0.0024 Min. : 0.000 Min. :-42.261
## 1st Qu.:0.4625 1st Qu.:0.5335 1st Qu.: 2.000 1st Qu.: -9.962
## Median :0.7050 Median :0.6590 Median : 6.000 Median : -7.766
## Mean :0.6367 Mean :0.6384 Mean : 5.497 Mean : -9.482
## 3rd Qu.:0.7990 3rd Qu.:0.8375 3rd Qu.: 8.000 3rd Qu.: -5.829
## Max. :0.9460 Max. :0.9960 Max. :11.000 Max. : -2.336
## mode speechiness acousticness instrumentalness
## Min. :0.0000 Min. :0.0278 Min. :3.050e-06 Min. :0.0000000
## 1st Qu.:0.0000 1st Qu.:0.0568 1st Qu.:4.220e-02 1st Qu.:0.0000000
## Median :1.0000 Median :0.0962 Median :2.130e-01 Median :0.0000076
## Mean :0.5385 Mean :0.1490 Mean :3.191e-01 Mean :0.1923373
## 3rd Qu.:1.0000 3rd Qu.:0.2305 3rd Qu.:5.040e-01 3rd Qu.:0.0975000
## Max. :1.0000 Max. :0.5400 Max. :9.950e-01 Max. :0.9690000
## liveness valence tempo duration_ms
## Min. :0.0331 Min. :0.0353 Min. : 60.17 Min. : 77203
## 1st Qu.:0.0840 1st Qu.:0.2690 1st Qu.:100.24 1st Qu.:178301
## Median :0.1050 Median :0.5250 Median :124.90 Median :204000
## Mean :0.1485 Mean :0.4936 Mean :121.09 Mean :213409
## 3rd Qu.:0.1770 3rd Qu.:0.7175 3rd Qu.:142.46 3rd Qu.:242374
## Max. :0.6330 Max. :0.9800 Max. :180.04 Max. :655213
## time_signature liked
## Min. :1.000 Min. :0.0000
## 1st Qu.:4.000 1st Qu.:0.0000
## Median :4.000 Median :1.0000
## Mean :3.913 Mean :0.5128
## 3rd Qu.:4.000 3rd Qu.:1.0000
## Max. :5.000 Max. :1.0000
par(mar = c(8,4,4,2))
boxplot(data[, c("danceability","energy","key","loudness","mode","speechiness",
"acousticness","instrumentalness",
"liveness","valence","tempo","duration_ms","time_signature","liked")],
col="lightblue",
las=2,
main="Boxplot Variabel")
sum(is.na(data))
## [1] 0
kolom_dihapus <- c(
"key",
"mode",
"time_signature",
"liked"
)
data_clean <- data[, !(names(data) %in% kolom_dihapus)]
knitr::kable(head(data_clean))
| danceability | energy | loudness | speechiness | acousticness | instrumentalness | liveness | valence | tempo | duration_ms |
|---|---|---|---|---|---|---|---|---|---|
| 0.803 | 0.6240 | -6.764 | 0.0477 | 0.4510 | 7.34e-04 | 0.1000 | 0.6280 | 95.968 | 304524 |
| 0.762 | 0.7030 | -7.951 | 0.3060 | 0.2060 | 0.00e+00 | 0.0912 | 0.5190 | 151.329 | 247178 |
| 0.261 | 0.0149 | -27.528 | 0.0419 | 0.9920 | 8.97e-01 | 0.1020 | 0.0382 | 75.296 | 286987 |
| 0.722 | 0.7360 | -6.994 | 0.0585 | 0.4310 | 1.20e-06 | 0.1230 | 0.5820 | 89.860 | 208920 |
| 0.787 | 0.5720 | -7.516 | 0.2220 | 0.1450 | 0.00e+00 | 0.0753 | 0.6470 | 155.117 | 179413 |
| 0.778 | 0.6320 | -6.415 | 0.1250 | 0.0404 | 0.00e+00 | 0.0912 | 0.8270 | 140.951 | 224029 |
colnames(data_clean) <- paste0("X", 1:ncol(data_clean))
knitr::kable(head(data_clean))
| X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | X9 | X10 |
|---|---|---|---|---|---|---|---|---|---|
| 0.803 | 0.6240 | -6.764 | 0.0477 | 0.4510 | 7.34e-04 | 0.1000 | 0.6280 | 95.968 | 304524 |
| 0.762 | 0.7030 | -7.951 | 0.3060 | 0.2060 | 0.00e+00 | 0.0912 | 0.5190 | 151.329 | 247178 |
| 0.261 | 0.0149 | -27.528 | 0.0419 | 0.9920 | 8.97e-01 | 0.1020 | 0.0382 | 75.296 | 286987 |
| 0.722 | 0.7360 | -6.994 | 0.0585 | 0.4310 | 1.20e-06 | 0.1230 | 0.5820 | 89.860 | 208920 |
| 0.787 | 0.5720 | -7.516 | 0.2220 | 0.1450 | 0.00e+00 | 0.0753 | 0.6470 | 155.117 | 179413 |
| 0.778 | 0.6320 | -6.415 | 0.1250 | 0.0404 | 0.00e+00 | 0.0912 | 0.8270 | 140.951 | 224029 |
Masing-masing variabel memiliki korelasi dan tidak bernilai 0
cor(data_clean)
## X1 X2 X3 X4 X5 X6
## X1 1.0000000 0.1371879 0.45507806 0.38859635 -0.23417647 -0.80705280
## X2 0.1371879 1.0000000 0.81356741 0.12282548 -0.77258324 -0.24144369
## X3 0.4550781 0.8135674 1.00000000 0.27971003 -0.66498947 -0.53826636
## X4 0.3885964 0.1228255 0.27971003 1.00000000 -0.07971033 -0.34324228
## X5 -0.2341765 -0.7725832 -0.66498947 -0.07971033 1.00000000 0.29432036
## X6 -0.8070528 -0.2414437 -0.53826636 -0.34324228 0.29432036 1.00000000
## X7 -0.1370687 0.1665082 0.07809341 -0.00666515 -0.14098782 0.05573045
## X8 0.6123443 0.3194087 0.36353233 0.18070753 -0.31380646 -0.57222362
## X9 0.2235217 0.2149051 0.27446222 0.31391823 -0.25509744 -0.29949290
## X10 -0.2326207 -0.1345274 -0.20633365 -0.38839658 0.13879339 0.24968279
## X7 X8 X9 X10
## X1 -0.13706866 0.61234430 0.22352170 -0.2326207
## X2 0.16650819 0.31940867 0.21490513 -0.1345274
## X3 0.07809341 0.36353233 0.27446222 -0.2063337
## X4 -0.00666515 0.18070753 0.31391823 -0.3883966
## X5 -0.14098782 -0.31380646 -0.25509744 0.1387934
## X6 0.05573045 -0.57222362 -0.29949290 0.2496828
## X7 1.00000000 -0.01300429 -0.01055516 -0.1439661
## X8 -0.01300429 1.00000000 0.21801724 -0.1148418
## X9 -0.01055516 0.21801724 1.00000000 -0.2562498
## X10 -0.14396607 -0.11484180 -0.25624981 1.0000000
corrplot::corrplot(cor(data_clean), tl.col = "black", tl.srt = 45, tl.cex = 0.5)
KMO/MSA memilki nilai > 0.5
r <- cor(data_clean)
KMO(r)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r)
## Overall MSA = 0.71
## MSA for each item =
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## 0.67 0.58 0.70 0.76 0.80 0.79 0.60 0.71 0.81 0.77
Dengan syarat hasil p-value < 0.05
bartlett.test(data_clean)
##
## Bartlett test of homogeneity of variances
##
## data: data_clean
## Bartlett's K-squared = 36677, df = 9, p-value < 2.2e-16
Principal Component Analysis (PCA) merupakan teknik reduksi dimensi yang melalukan transformasi variabel asli menjadi sejumlah komponen utama baru yang tidak berkorelasi
Data diubah agar memiliki skala yang sama, sehingga semua variabel memiliki kontribusi yang seimbang
Z <- scale(data_clean)
Melakukan PCA untuk mendapatkan hasil loading dan eigen value, sehingga walaupun komponen yang dihasilkan sedikit tetapi informasi yang dimiliki tidak hilang
pca_full <- principal(Z,
nfactors = ncol(Z),
rotate = "none")
pca_full
## Principal Components Analysis
## Call: principal(r = Z, nfactors = ncol(Z), rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 h2 u2 com
## X1 0.73 -0.51 -0.23 0.18 -0.05 0.02 -0.15 -0.12 0.28 0.08 1 0.0e+00 2.8
## X2 0.69 0.65 -0.05 -0.12 -0.11 0.02 0.11 0.17 -0.02 0.20 1 2.2e-15 2.5
## X3 0.84 0.34 -0.08 -0.09 -0.19 0.09 -0.18 0.24 0.09 -0.17 1 6.7e-16 2.0
## X4 0.48 -0.39 0.49 -0.15 -0.27 0.48 0.23 -0.07 -0.05 -0.01 1 1.1e-16 5.2
## X5 -0.68 -0.57 0.08 0.11 0.01 0.09 -0.03 0.43 0.04 0.05 1 1.2e-15 2.8
## X6 -0.78 0.39 0.19 -0.17 -0.02 0.00 0.31 -0.01 0.29 -0.03 1 -1.3e-15 2.5
## X7 0.06 0.41 0.45 0.71 0.26 0.20 -0.08 -0.01 0.03 0.00 1 8.9e-16 3.0
## X8 0.66 -0.22 -0.34 0.27 0.25 -0.11 0.49 0.09 -0.03 -0.06 1 -8.9e-16 3.7
## X9 0.48 -0.11 0.35 -0.46 0.65 0.00 -0.08 0.04 0.04 0.01 1 6.7e-16 3.5
## X10 -0.40 0.18 -0.68 -0.08 0.23 0.53 -0.03 -0.03 0.00 0.00 1 2.2e-16 3.1
##
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10
## SS loadings 3.81 1.69 1.25 0.91 0.73 0.59 0.47 0.30 0.17 0.08
## Proportion Var 0.38 0.17 0.12 0.09 0.07 0.06 0.05 0.03 0.02 0.01
## Cumulative Var 0.38 0.55 0.68 0.77 0.84 0.90 0.94 0.97 0.99 1.00
## Proportion Explained 0.38 0.17 0.12 0.09 0.07 0.06 0.05 0.03 0.02 0.01
## Cumulative Proportion 0.38 0.55 0.68 0.77 0.84 0.90 0.94 0.97 0.99 1.00
##
## Mean item complexity = 3.1
## Test of the hypothesis that 10 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0
## with the empirical chi square 0 with prob < NA
##
## Fit based upon off diagonal values = 1
Untuk menentukan komponen PCA yang paling memiliki pengaruh
pca_vis <- prcomp(Z, scale. = FALSE)
fviz_eig(pca_vis,
addlabels = TRUE,
barfill = "skyblue",
barcolor = "darkblue",
linecolor = "red")
## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.
pca_result <- principal(Z,
nfactors = 4,
rotate = "none")
pca_result
## Principal Components Analysis
## Call: principal(r = Z, nfactors = 4, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 PC4 h2 u2 com
## X1 0.73 -0.51 -0.23 0.18 0.88 0.123 2.2
## X2 0.69 0.65 -0.05 -0.12 0.91 0.093 2.1
## X3 0.84 0.34 -0.08 -0.09 0.82 0.175 1.4
## X4 0.48 -0.39 0.49 -0.15 0.63 0.366 3.1
## X5 -0.68 -0.57 0.08 0.11 0.80 0.197 2.0
## X6 -0.78 0.39 0.19 -0.17 0.82 0.179 1.7
## X7 0.06 0.41 0.45 0.71 0.88 0.116 2.4
## X8 0.66 -0.22 -0.34 0.27 0.68 0.323 2.2
## X9 0.48 -0.11 0.35 -0.46 0.57 0.428 3.0
## X10 -0.40 0.18 -0.68 -0.08 0.66 0.338 1.8
##
## PC1 PC2 PC3 PC4
## SS loadings 3.81 1.69 1.25 0.91
## Proportion Var 0.38 0.17 0.12 0.09
## Cumulative Var 0.38 0.55 0.68 0.77
## Proportion Explained 0.50 0.22 0.16 0.12
## Cumulative Proportion 0.50 0.72 0.88 1.00
##
## Mean item complexity = 2.2
## Test of the hypothesis that 4 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.05
## with the empirical chi square 52.99 with prob < 1.8e-07
##
## Fit based upon off diagonal values = 0.98
Fungsi principal() memberikan loading dan bukan eigenvector, sehingga untuk mendapatkan hasil final skor PCA dibutuhkan eigenvector agar sesuai dengan rumus aslinya
L <- as.matrix(pca_result$loadings)
lambda <- pca_result$values[1:4]
V <- sweep(L, 2, sqrt(lambda), "/")
V
##
## Loadings:
## PC1 PC2 PC3 PC4
## X1 0.372 -0.395 -0.209 0.187
## X2 0.351 0.498 -0.126
## X3 0.428 0.259
## X4 0.245 -0.296 0.435 -0.157
## X5 -0.349 -0.436 0.113
## X6 -0.397 0.304 0.167 -0.178
## X7 0.319 0.405 0.743
## X8 0.339 -0.171 -0.305 0.286
## X9 0.244 0.313 -0.480
## X10 -0.203 0.142 -0.610
##
## PC1 PC2 PC3 PC4
## SS loadings 1.0 1.0 1.0 1.0
## Proportion Var 0.1 0.1 0.1 0.1
## Cumulative Var 0.1 0.2 0.3 0.4
Skor menunjukkan posisi setiap objek pada komponen utama, dimana ketika memiliki nilai tinggi maka objek punya karakteristik tertentu
scores <- Z %*% V
head(scores)
## PC1 PC2 PC3 PC4
## [1,] -0.003877716 -0.29701663 -2.00275192 0.5130620
## [2,] 1.247996752 -0.66125321 0.10228409 -1.0215715
## [3,] -5.621553101 -0.82116628 -0.01689206 0.1259920
## [4,] 0.202192105 -0.01384421 -1.02648135 0.7033173
## [5,] 1.418859682 -0.92545428 0.18695078 -0.8128683
## [6,] 1.457208404 -0.30579038 -0.88004258 -0.2837756
fviz_pca_biplot(pca_vis,
geom.ind = "point",
addEllipses = TRUE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
## Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Interpretasi :
p1 <- fviz_contrib(pca_vis, choice = "var", axes = 1, top = 5) + ggtitle("PC1")
p2 <- fviz_contrib(pca_vis, choice = "var", axes = 2, top = 5) + ggtitle("PC2")
p3 <- fviz_contrib(pca_vis, choice = "var", axes = 3, top = 5) + ggtitle("PC3")
p4 <- fviz_contrib(pca_vis, choice = "var", axes = 4, top = 5) + ggtitle("PC4")
gridExtra::grid.arrange(p1, p2, p3, p4, nrow = 1)
Interpretasi :
Factor Analysis (FA) adalah teknik statistik untuk Mencari faktor laten (tersembunyi) yang menyebabkan variabel-variabel saling berkorelasi.
Menentukan jumlah faktor yang optimal dengan membandingkan eigenvalue pada data aktual, data simulasi acak, dan data hasil resampling.
fa.parallel(Z, fm = "pa", fa = "fa")
## Parallel analysis suggests that the number of factors = 3 and the number of components = NA
Hasilnya menunjukkan jumlah faktor yang layak dipertahankan dalam analisis adalah 3 faktor, karena hanya tiga faktor tersebut yang memiliki eigenvalue lebih besar dari eigenvalue data simulasi.
fa_pa <- fa(Z,
nfactors = 3,
rotate = "none",
fm = "pa")
fa_pa
## Factor Analysis using method = pa
## Call: fa(r = Z, nfactors = 3, rotate = "none", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 PA2 PA3 h2 u2 com
## X1 0.74 -0.58 -0.21 0.925 0.07452 2.1
## X2 0.73 0.69 -0.03 0.999 0.00084 2.0
## X3 0.82 0.29 -0.02 0.760 0.23951 1.2
## X4 0.41 -0.27 0.44 0.444 0.55623 2.7
## X5 -0.65 -0.46 0.06 0.632 0.36778 1.8
## X6 -0.75 0.41 0.13 0.754 0.24596 1.6
## X7 0.05 0.21 0.16 0.072 0.92841 2.0
## X8 0.59 -0.20 -0.22 0.436 0.56399 1.5
## X9 0.39 -0.06 0.24 0.214 0.78589 1.7
## X10 -0.34 0.13 -0.51 0.387 0.61339 1.9
##
## PA1 PA2 PA3
## SS loadings 3.52 1.45 0.65
## Proportion Var 0.35 0.15 0.07
## Cumulative Var 0.35 0.50 0.56
## Proportion Explained 0.63 0.26 0.12
## Cumulative Proportion 0.63 0.88 1.00
##
## Mean item complexity = 1.9
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 45 with the objective function = 5.06 with Chi Square = 960.01
## df of the model are 18 and the objective function was 0.28
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.04
##
## The harmonic n.obs is 195 with the empirical chi square 6.6 with prob < 0.99
## The total n.obs was 195 with Likelihood Chi Square = 53.07 with prob < 2.6e-05
##
## Tucker Lewis Index of factoring reliability = 0.903
## RMSEA index = 0.1 and the 90 % confidence intervals are 0.07 0.132
## BIC = -41.84
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## PA1 PA2 PA3
## Correlation of (regression) scores with factors 0.98 0.98 0.75
## Multiple R square of scores with factors 0.97 0.97 0.56
## Minimum correlation of possible factor scores 0.93 0.93 0.11
Pada tahap ekstraksi tanpa rotasi menunjukkan bahwa dua faktor utama memiliki eigenvalue > 1, sedangkan faktor ketiga memiliki eigenvalue < 1 sehingga kontribusinya relatif kecil. Ketiga faktor mampu menjelaskan 56% total variansi data. Nilai RMSR sebesar 0,03 menunjukkan model cukup baik, namun nilai RMSEA sebesar 0,10 mengindikasikan bahwa model masih kurang optimal. Selain itu, beberapa variabel seperti X7 dan X9 memiliki nilai communalities yang rendah sehingga kurang representatif dalam pembentukan faktor.
fa_varimax <- fa(Z,
nfactors = 3,
rotate = "varimax",
fm = "pa",
scores = TRUE)
fa_varimax
## Factor Analysis using method = pa
## Call: fa(r = Z, nfactors = 3, rotate = "varimax", scores = TRUE, fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA2 PA1 PA3 h2 u2 com
## X1 0.06 0.92 0.28 0.925 0.07452 1.2
## X2 0.99 0.07 0.08 0.999 0.00084 1.0
## X3 0.75 0.36 0.24 0.760 0.23951 1.7
## X4 0.03 0.21 0.63 0.444 0.55623 1.2
## X5 -0.77 -0.17 -0.09 0.632 0.36778 1.1
## X6 -0.19 -0.79 -0.31 0.754 0.24596 1.4
## X7 0.18 -0.17 0.10 0.072 0.92841 2.5
## X8 0.25 0.60 0.11 0.436 0.56399 1.4
## X9 0.19 0.16 0.39 0.214 0.78589 1.8
## X10 -0.09 -0.04 -0.61 0.387 0.61339 1.0
##
## PA2 PA1 PA3
## SS loadings 2.33 2.10 1.20
## Proportion Var 0.23 0.21 0.12
## Cumulative Var 0.23 0.44 0.56
## Proportion Explained 0.41 0.37 0.21
## Cumulative Proportion 0.41 0.79 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 45 with the objective function = 5.06 with Chi Square = 960.01
## df of the model are 18 and the objective function was 0.28
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.04
##
## The harmonic n.obs is 195 with the empirical chi square 6.6 with prob < 0.99
## The total n.obs was 195 with Likelihood Chi Square = 53.07 with prob < 2.6e-05
##
## Tucker Lewis Index of factoring reliability = 0.903
## RMSEA index = 0.1 and the 90 % confidence intervals are 0.07 0.132
## BIC = -41.84
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## PA2 PA1 PA3
## Correlation of (regression) scores with factors 1.00 0.95 0.77
## Multiple R square of scores with factors 1.00 0.90 0.59
## Minimum correlation of possible factor scores 0.99 0.79 0.19
Hasil setelah rotasi Varimax menunjukkan bahwa terbentuk tiga faktor utama. Faktor pertama terdiri dari variabel X2, X3, X5, dan X6. Faktor kedua terdiri dari X1 dan X8. Faktor ketiga terdiri dari X4, X9, dan X10. Ketiga faktor mampu menjelaskan 56% total variansi data. Nilai RMSR sebesar 0,03 dan TLI sebesar 0,903 menunjukkan model cukup baik, meskipun nilai RMSEA sebesar 0,10 mengindikasikan model belum sepenuhnya optimal. Variabel X7 memiliki nilai communalities rendah sehingga kontribusinya terhadap model relatif kecil.
Z_new <- Z[, !colnames(Z) %in% c("X7")]
Variabel X7 dihapus dari model karena tidak memiliki factor loading yang signifikan pada ketiga faktor (seluruh loading < 0,30). Selain itu, nilai communalitiesnya menunjukkan bahwa variabel tersebut tidak mampu dijelaskan oleh faktor yang terbentuk. Oleh karena itu, X7 dianggap tidak representatif dalam struktur faktor dan dikeluarkan dari analisis untuk meningkatkan kualitas model.
r_baru <- cor(Z_new)
KMO(r_baru)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r_baru)
## Overall MSA = 0.71
## MSA for each item =
## X1 X2 X3 X4 X5 X6 X8 X9 X10
## 0.67 0.57 0.69 0.76 0.80 0.79 0.71 0.82 0.80
cortest.bartlett(r_baru)
## Warning in cortest.bartlett(r_baru): n not specified, 100 used
## $chisq
## [1] 472.2902
##
## $p.value
## [1] 1.856749e-77
##
## $df
## [1] 36
fa.parallel(Z_new, fm = "pa", fa = "fa")
## Parallel analysis suggests that the number of factors = 3 and the number of components = NA
fa_varimax <- fa(Z_new,
nfactors = 3,
rotate = "varimax",
fm = "pa",
scores = TRUE)
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
fa_varimax
## Factor Analysis using method = pa
## Call: fa(r = Z_new, nfactors = 3, rotate = "varimax", scores = TRUE,
## fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA2 PA1 PA3 h2 u2 com
## X1 0.06 0.91 0.27 0.90 0.0996 1.2
## X2 1.00 0.06 0.10 1.01 -0.0098 1.0
## X3 0.76 0.36 0.25 0.76 0.2369 1.7
## X4 0.03 0.20 0.69 0.52 0.4848 1.2
## X5 -0.77 -0.17 -0.09 0.62 0.3755 1.1
## X6 -0.19 -0.80 -0.29 0.77 0.2341 1.4
## X8 0.24 0.62 0.09 0.45 0.5510 1.4
## X9 0.19 0.16 0.40 0.22 0.7783 1.8
## X10 -0.08 -0.09 -0.54 0.31 0.6930 1.1
##
## PA2 PA1 PA3
## SS loadings 2.30 2.08 1.18
## Proportion Var 0.26 0.23 0.13
## Cumulative Var 0.26 0.49 0.62
## Proportion Explained 0.41 0.37 0.21
## Cumulative Proportion 0.41 0.79 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 36 with the objective function = 4.96 with Chi Square = 943.75
## df of the model are 12 and the objective function was 0.24
##
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.04
##
## The harmonic n.obs is 195 with the empirical chi square 3.64 with prob < 0.99
## The total n.obs was 195 with Likelihood Chi Square = 44.46 with prob < 1.3e-05
##
## Tucker Lewis Index of factoring reliability = 0.892
## RMSEA index = 0.118 and the 90 % confidence intervals are 0.082 0.156
## BIC = -18.81
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## PA2 PA1 PA3
## Correlation of (regression) scores with factors 1.00 0.94 0.77
## Multiple R square of scores with factors 1.00 0.89 0.60
## Minimum correlation of possible factor scores 0.99 0.77 0.20
Hasil rotasi Varimax menunjukkan bahwa terbentuk tiga faktor utama. Faktor pertama terdiri dari variabel X2, X3, X5, dan X6. Faktor kedua terdiri dari X1 dan X8. Faktor ketiga terdiri dari X4 dan X10. Nilai RMSR sebesar 0,02 dan TLI sebesar 0,892 menunjukkan model cukup baik, meskipun nilai RMSEA sebesar 0,118 mengindikasikan model belum sepenuhnya optimal. Variabel X9 memiliki nilai communalities rendah sehingga kontribusinya terhadap model relatif kecil.
Z_new3 <- Z_new[, -which(colnames(Z_new)=="X9")]
Variabel X9 dihapus dari model karena memiliki nilai communalities < 0.30 menunjukkan bahwa variabel tersebut tidak mampu dijelaskan oleh faktor yang terbentuk. Oleh karena itu, X9 dianggap tidak representatif dalam struktur faktor dan dikeluarkan dari analisis untuk meningkatkan kualitas model.
r_baru1 <- cor(Z_new3)
KMO(r_baru1)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r_baru1)
## Overall MSA = 0.7
## MSA for each item =
## X1 X2 X3 X4 X5 X6 X8 X10
## 0.67 0.56 0.69 0.77 0.81 0.79 0.71 0.75
cortest.bartlett(r_baru1)
## Warning in cortest.bartlett(r_baru1): n not specified, 100 used
## $chisq
## [1] 453.2787
##
## $p.value
## [1] 2.64483e-78
##
## $df
## [1] 28
fa.parallel(Z_new3, fm = "pa", fa = "fa")
## Parallel analysis suggests that the number of factors = 3 and the number of components = NA
fa_varimax <- fa(Z_new3,
nfactors = 3,
rotate = "varimax",
fm = "pa",
scores = TRUE)
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
fa_varimax
## Factor Analysis using method = pa
## Call: fa(r = Z_new3, nfactors = 3, rotate = "varimax", scores = TRUE,
## fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA2 PA1 PA3 h2 u2 com
## X1 0.07 0.91 0.28 0.91 0.093 1.2
## X2 1.01 0.06 0.08 1.03 -0.027 1.0
## X3 0.76 0.36 0.24 0.77 0.229 1.6
## X4 0.04 0.19 0.74 0.59 0.411 1.1
## X5 -0.76 -0.19 -0.04 0.61 0.386 1.1
## X6 -0.19 -0.81 -0.26 0.76 0.240 1.3
## X8 0.24 0.63 0.05 0.45 0.547 1.3
## X10 -0.10 -0.11 -0.48 0.26 0.744 1.2
##
## PA2 PA1 PA3
## SS loadings 2.29 2.08 1.00
## Proportion Var 0.29 0.26 0.12
## Cumulative Var 0.29 0.55 0.67
## Proportion Explained 0.43 0.39 0.19
## Cumulative Proportion 0.43 0.81 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 28 with the objective function = 4.75 with Chi Square = 904.18
## df of the model are 7 and the objective function was 0.17
##
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.04
##
## The harmonic n.obs is 195 with the empirical chi square 1.91 with prob < 0.96
## The total n.obs was 195 with Likelihood Chi Square = 32.26 with prob < 3.6e-05
##
## Tucker Lewis Index of factoring reliability = 0.883
## RMSEA index = 0.136 and the 90 % confidence intervals are 0.091 0.186
## BIC = -4.65
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## PA2 PA1 PA3
## Correlation of (regression) scores with factors 1 0.94 0.78
## Multiple R square of scores with factors 1 0.89 0.61
## Minimum correlation of possible factor scores 1 0.78 0.22
Hasil rotasi Varimax menunjukkan bahwa terbentuk tiga faktor utama. Faktor pertama terdiri dari variabel X2, X3, X5, dan X6. Faktor kedua terdiri dari X1 dan X8. Faktor ketiga terdiri dari X4, X9, dan X10. Nilai RMSR sebesar 0,02 dan TLI sebesar 0,883 menunjukkan model cukup baik, meskipun nilai RMSEA sebesar 0,136 mengindikasikan model belum sepenuhnya optimal. Variabel X10 memiliki nilai communalities rendah dan X2 memiliki nilai communalities sangat tinggi sehingga kontribusinya terhadap model kurang baik.
Z_new2 <- Z_new3[, !(colnames(Z_new3) %in% c("X2","X10"))]
Variabel X2 dan X10 dihapus dari model karena tidak memiliki factor loading yang signifikan pada ketiga faktor (seluruh loading < 0,30 dan > 1,00). Selain itu, nilai communalitiesnya menunjukkan bahwa variabel X10 tidak mampu dijelaskan oleh faktor yang terbentuk dan untuk X2 menunjukkan relasi yang sangat kuat. Oleh karena itu, x2 dan X10 dianggap tidak representatif dalam struktur faktor dan dikeluarkan dari analisis untuk meningkatkan kualitas model.
r_baru2 <- cor(Z_new2)
KMO(r_baru2)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r_baru2)
## Overall MSA = 0.73
## MSA for each item =
## X1 X3 X4 X5 X6 X8
## 0.72 0.69 0.82 0.60 0.75 0.85
cortest.bartlett(r_baru2)
## Warning in cortest.bartlett(r_baru2): n not specified, 100 used
## $chisq
## [1] 263.1868
##
## $p.value
## [1] 2.368208e-47
##
## $df
## [1] 15
fa.parallel(Z_new2, fm = "pa", fa = "fa")
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
fa_varimax <- fa(Z_new2,
nfactors = 2,
rotate = "varimax",
fm = "pa",
scores = TRUE)
## maximum iteration exceeded
fa_varimax
## Factor Analysis using method = pa
## Call: fa(r = Z_new2, nfactors = 2, rotate = "varimax", scores = TRUE,
## fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 PA2 h2 u2 com
## X1 0.94 0.14 0.90 0.095 1.0
## X3 0.40 0.70 0.65 0.350 1.6
## X4 0.38 0.09 0.15 0.846 1.1
## X5 -0.10 -0.89 0.80 0.198 1.0
## X6 -0.84 -0.26 0.77 0.231 1.2
## X8 0.58 0.25 0.40 0.595 1.4
##
## PA1 PA2
## SS loadings 2.24 1.44
## Proportion Var 0.37 0.24
## Cumulative Var 0.37 0.61
## Proportion Explained 0.61 0.39
## Cumulative Proportion 0.61 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 15 with the objective function = 2.74 with Chi Square = 523.18
## df of the model are 4 and the objective function was 0.09
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.06
##
## The harmonic n.obs is 195 with the empirical chi square 3.13 with prob < 0.54
## The total n.obs was 195 with Likelihood Chi Square = 17.2 with prob < 0.0018
##
## Tucker Lewis Index of factoring reliability = 0.902
## RMSEA index = 0.13 and the 90 % confidence intervals are 0.071 0.197
## BIC = -3.9
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## PA1 PA2
## Correlation of (regression) scores with factors 0.96 0.91
## Multiple R square of scores with factors 0.92 0.83
## Minimum correlation of possible factor scores 0.84 0.67
Hasil rotasi Varimax menunjukkan bahwa terbentuk dua faktor utama karena pada model 3 faktor muncul ultra-heywood pada X2. Faktor pertama terdiri dari variabel X1, X6, dan X8. Faktor kedua terdiri dari X3 dan X5. Kedua faktor mampu menjelaskan 61% total variansi data. Nilai RMSR sebesar 0,03 dan TLI sebesar 0,902 menunjukkan model cukup baik, meskipun nilai RMSEA sebesar 0,13 mengindikasikan model belum sepenuhnya optimal dan varibael X4 memiliki nilai communalities rendah sehingga kontribusinya terhadap model kurang baik.
Z_new4 <- Z_new2[, !colnames(Z_new2) %in% c("X4")]
Variabel X4 dihapus dari model karena memiliki nilai communalities < 0,30 menunjukkan bahwa variabel tersebut tidak mampu dijelaskan oleh faktor yang terbentuk. Oleh karena itu, X4 dianggap tidak representatif dalam struktur faktor dan dikeluarkan dari analisis untuk meningkatkan kualitas model.
r_baru4 <- cor(Z_new4)
KMO(r_baru4)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r_baru4)
## Overall MSA = 0.71
## MSA for each item =
## X1 X3 X5 X6 X8
## 0.70 0.69 0.60 0.72 0.85
cortest.bartlett(r_baru4)
## Warning in cortest.bartlett(r_baru4): n not specified, 100 used
## $chisq
## [1] 244.7261
##
## $p.value
## [1] 6.967857e-47
##
## $df
## [1] 10
fa.parallel(Z_new4, fm = "pa", fa = "fa")
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
fa_varimax <- fa(Z_new4,
nfactors = 2,
rotate = "varimax",
fm = "pa",
scores = TRUE)
## maximum iteration exceeded
fa_varimax
## Factor Analysis using method = pa
## Call: fa(r = Z_new4, nfactors = 2, rotate = "varimax", scores = TRUE,
## fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 PA2 h2 u2 com
## X1 0.92 -0.14 0.88 0.12 1.0
## X3 0.39 -0.70 0.64 0.36 1.6
## X5 -0.11 0.89 0.80 0.20 1.0
## X6 -0.84 0.25 0.78 0.22 1.2
## X8 0.61 -0.24 0.43 0.57 1.3
##
## PA1 PA2
## SS loadings 2.10 1.42
## Proportion Var 0.42 0.28
## Cumulative Var 0.42 0.70
## Proportion Explained 0.60 0.40
## Cumulative Proportion 0.60 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 10 with the objective function = 2.54 with Chi Square = 485.65
## df of the model are 1 and the objective function was 0.05
##
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.07
##
## The harmonic n.obs is 195 with the empirical chi square 0.94 with prob < 0.33
## The total n.obs was 195 with Likelihood Chi Square = 8.79 with prob < 0.003
##
## Tucker Lewis Index of factoring reliability = 0.835
## RMSEA index = 0.2 and the 90 % confidence intervals are 0.095 0.331
## BIC = 3.51
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## PA1 PA2
## Correlation of (regression) scores with factors 0.95 0.91
## Multiple R square of scores with factors 0.90 0.83
## Minimum correlation of possible factor scores 0.81 0.65
Hasil rotasi Varimax menunjukkan bahwa terbentuk dua faktor utama. Faktor pertama terdiri dari variabel X1, X6, dan X8. Faktor kedua terdiri dari X3 dan X5. Kedua faktor mampu menjelaskan 70% total variansi data. Nilai RMSR sebesar 0,02 dan TLI sebesar 0,835 menunjukkan model cukup baik, meskipun nilai RMSEA sebesar 0,20 tergolong tinggi. Secara keseluruhan, model dua faktor dapat dinilai cukup memadai dan stabil dalam merepresentasikan struktur data.
print(fa_varimax$loadings, cutoff = 0.3)
##
## Loadings:
## PA1 PA2
## X1 0.925
## X3 0.388 -0.700
## X5 0.885
## X6 -0.844
## X8 0.608
##
## PA1 PA2
## SS loadings 2.101 1.416
## Proportion Var 0.420 0.283
## Cumulative Var 0.420 0.703
loadings <- as.data.frame(unclass(fa_varimax$loadings))
loadings$Variable <- rownames(loadings)
loadings
## PA1 PA2 Variable
## X1 0.9246307 -0.1434650 X1
## X3 0.3883844 -0.6999034 X3
## X5 -0.1137521 0.8851337 X5
## X6 -0.8442779 0.2519433 X6
## X8 0.6080635 -0.2414183 X8
Hasil rotasi Varimax menunjukkan terbentuk dua faktor utama. Faktor pertama (PA1) dibentuk oleh variabel X1 (0,925), X6 (−0,844), dan X8 (0,608). Faktor kedua (PA2) dibentuk oleh variabel X5 (0,885) dan X3 (−0,700). Kedua faktor mampu menjelaskan 70,3% total variansi data, sehingga model dua faktor dinilai cukup baik dalam merepresentasikan struktur data.
fa_varimax$RMSEA
## RMSEA lower upper confidence
## 0.19977581 0.09473164 0.33092524 0.90000000
fa_varimax$TLI
## [1] 0.8351001
fa_varimax$rms
## [1] 0.02195696
fa_varimax$BIC
## [1] 3.514678
fa_varimax$Vaccounted
## PA1 PA2
## SS loadings 2.1012703 1.4156668
## Proportion Var 0.4202541 0.2831334
## Cumulative Var 0.4202541 0.7033874
## Proportion Explained 0.5974717 0.4025283
## Cumulative Proportion 0.5974717 1.0000000
summary(fa_varimax)
##
## Factor analysis with Call: fa(r = Z_new4, nfactors = 2, rotate = "varimax", scores = TRUE,
## fm = "pa")
##
## Test of the hypothesis that 2 factors are sufficient.
## The degrees of freedom for the model is 1 and the objective function was 0.05
## The number of observations was 195 with Chi Square = 8.79 with prob < 0.003
##
## The root mean square of the residuals (RMSA) is 0.02
## The df corrected root mean square of the residuals is 0.07
##
## Tucker Lewis Index of factoring reliability = 0.835
## RMSEA index = 0.2 and the 90 % confidence intervals are 0.095 0.331
## BIC = 3.51
fa_scores <- fa_varimax$scores
head(fa_scores)
## PA1 PA2
## [1,] 0.7793723 0.3066403
## [2,] 0.5125727 -0.2359794
## [3,] -1.5738855 2.0589759
## [4,] 0.5097425 0.2029549
## [5,] 0.5936671 -0.3747409
## [6,] 0.5621896 -0.6708985
fa.diagram(fa_varimax$loadings)
loadings <- as.data.frame(unclass(fa_varimax$loadings))
loadings$Variable <- rownames(loadings)
load_melt <- melt(loadings, id.vars = "Variable")
ggplot(load_melt, aes(x = Variable, y = value)) +
geom_bar(stat = "identity") +
facet_wrap(~variable) +
coord_flip() +
ggtitle("Factor Loadings (Varimax Rotation)")