library(psych)
library(dplyr)
library(FactoMineR) 
library(factoextra)
library(GPArotation)
library(reshape2)
library(ggplot2)

Import Data

Dataset yang digunakan dalam penelitian ini adalah Spotify Recommendation Dataset yang diperoleh dari platform Kaggle dan dapat diakses melalui tautan berikut, https://www.kaggle.com/datasets/bricevergnou/spotify-recommendation.

Dimana dataset ini memiliki 13 variabel, yaitu 12 variabel fitur audio seperti acousticness, danceability, energy, tempo, valence, dan lainnya, serta 1 variabel target bernama “liked” yang bersifat biner, yaitu 1 (disukai) dan 0 (tidak disukai).

1. Dataset

# Import Data
data <- read.csv("data.csv")
knitr::kable(head(data))
danceability energy key loudness mode speechiness acousticness instrumentalness liveness valence tempo duration_ms time_signature liked
0.803 0.6240 7 -6.764 0 0.0477 0.4510 7.34e-04 0.1000 0.6280 95.968 304524 4 0
0.762 0.7030 10 -7.951 0 0.3060 0.2060 0.00e+00 0.0912 0.5190 151.329 247178 4 1
0.261 0.0149 1 -27.528 1 0.0419 0.9920 8.97e-01 0.1020 0.0382 75.296 286987 4 0
0.722 0.7360 3 -6.994 0 0.0585 0.4310 1.20e-06 0.1230 0.5820 89.860 208920 4 1
0.787 0.5720 1 -7.516 1 0.2220 0.1450 0.00e+00 0.0753 0.6470 155.117 179413 4 1
0.778 0.6320 8 -6.415 1 0.1250 0.0404 0.00e+00 0.0912 0.8270 140.951 224029 4 1

2. Struktur dan Tipe Data

# Struktur dan Tipe Data
str(data)
## 'data.frame':    195 obs. of  14 variables:
##  $ danceability    : num  0.803 0.762 0.261 0.722 0.787 0.778 0.666 0.922 0.794 0.853 ...
##  $ energy          : num  0.624 0.703 0.0149 0.736 0.572 0.632 0.589 0.712 0.659 0.668 ...
##  $ key             : int  7 10 1 3 1 8 0 7 7 3 ...
##  $ loudness        : num  -6.76 -7.95 -27.53 -6.99 -7.52 ...
##  $ mode            : int  0 0 1 0 1 1 0 1 0 1 ...
##  $ speechiness     : num  0.0477 0.306 0.0419 0.0585 0.222 0.125 0.324 0.171 0.0498 0.447 ...
##  $ acousticness    : num  0.451 0.206 0.992 0.431 0.145 0.0404 0.555 0.0779 0.143 0.263 ...
##  $ instrumentalness: num  7.34e-04 0.00 8.97e-01 1.18e-06 0.00 0.00 0.00 3.96e-05 2.24e-03 0.00 ...
##  $ liveness        : num  0.1 0.0912 0.102 0.123 0.0753 0.0912 0.114 0.175 0.0944 0.104 ...
##  $ valence         : num  0.628 0.519 0.0382 0.582 0.647 0.827 0.776 0.904 0.308 0.745 ...
##  $ tempo           : num  96 151.3 75.3 89.9 155.1 ...
##  $ duration_ms     : int  304524 247178 286987 208920 179413 224029 146053 161800 247460 165363 ...
##  $ time_signature  : int  4 4 4 4 4 4 4 4 4 4 ...
##  $ liked           : int  0 1 0 1 1 1 1 1 0 1 ...

Preprocessing

1. EDA

Untuk mengetahui distribusi data, sehingga dapat dilakukan analisis lanjutan

summary(data)
##   danceability        energy            key            loudness      
##  Min.   :0.1300   Min.   :0.0024   Min.   : 0.000   Min.   :-42.261  
##  1st Qu.:0.4625   1st Qu.:0.5335   1st Qu.: 2.000   1st Qu.: -9.962  
##  Median :0.7050   Median :0.6590   Median : 6.000   Median : -7.766  
##  Mean   :0.6367   Mean   :0.6384   Mean   : 5.497   Mean   : -9.482  
##  3rd Qu.:0.7990   3rd Qu.:0.8375   3rd Qu.: 8.000   3rd Qu.: -5.829  
##  Max.   :0.9460   Max.   :0.9960   Max.   :11.000   Max.   : -2.336  
##       mode         speechiness      acousticness       instrumentalness   
##  Min.   :0.0000   Min.   :0.0278   Min.   :3.050e-06   Min.   :0.0000000  
##  1st Qu.:0.0000   1st Qu.:0.0568   1st Qu.:4.220e-02   1st Qu.:0.0000000  
##  Median :1.0000   Median :0.0962   Median :2.130e-01   Median :0.0000076  
##  Mean   :0.5385   Mean   :0.1490   Mean   :3.191e-01   Mean   :0.1923373  
##  3rd Qu.:1.0000   3rd Qu.:0.2305   3rd Qu.:5.040e-01   3rd Qu.:0.0975000  
##  Max.   :1.0000   Max.   :0.5400   Max.   :9.950e-01   Max.   :0.9690000  
##     liveness         valence           tempo         duration_ms    
##  Min.   :0.0331   Min.   :0.0353   Min.   : 60.17   Min.   : 77203  
##  1st Qu.:0.0840   1st Qu.:0.2690   1st Qu.:100.24   1st Qu.:178301  
##  Median :0.1050   Median :0.5250   Median :124.90   Median :204000  
##  Mean   :0.1485   Mean   :0.4936   Mean   :121.09   Mean   :213409  
##  3rd Qu.:0.1770   3rd Qu.:0.7175   3rd Qu.:142.46   3rd Qu.:242374  
##  Max.   :0.6330   Max.   :0.9800   Max.   :180.04   Max.   :655213  
##  time_signature      liked       
##  Min.   :1.000   Min.   :0.0000  
##  1st Qu.:4.000   1st Qu.:0.0000  
##  Median :4.000   Median :1.0000  
##  Mean   :3.913   Mean   :0.5128  
##  3rd Qu.:4.000   3rd Qu.:1.0000  
##  Max.   :5.000   Max.   :1.0000

Visualisasi Distribusi

par(mar = c(8,4,4,2))
boxplot(data[, c("danceability","energy","key","loudness","mode","speechiness",
                 "acousticness","instrumentalness",
                 "liveness","valence","tempo","duration_ms","time_signature","liked")],
        col="lightblue",
        las=2,
        main="Boxplot Variabel")

2. Hapus Missing Value

sum(is.na(data))
## [1] 0

3. Feature Selection

kolom_dihapus <- c(
  "key",
  "mode",
  "time_signature",
  "liked"
)

data_clean <- data[, !(names(data) %in% kolom_dihapus)]
knitr::kable(head(data_clean))
danceability energy loudness speechiness acousticness instrumentalness liveness valence tempo duration_ms
0.803 0.6240 -6.764 0.0477 0.4510 7.34e-04 0.1000 0.6280 95.968 304524
0.762 0.7030 -7.951 0.3060 0.2060 0.00e+00 0.0912 0.5190 151.329 247178
0.261 0.0149 -27.528 0.0419 0.9920 8.97e-01 0.1020 0.0382 75.296 286987
0.722 0.7360 -6.994 0.0585 0.4310 1.20e-06 0.1230 0.5820 89.860 208920
0.787 0.5720 -7.516 0.2220 0.1450 0.00e+00 0.0753 0.6470 155.117 179413
0.778 0.6320 -6.415 0.1250 0.0404 0.00e+00 0.0912 0.8270 140.951 224029

4. Mengganti Nama Fitur

colnames(data_clean) <- paste0("X", 1:ncol(data_clean))
knitr::kable(head(data_clean))
X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
0.803 0.6240 -6.764 0.0477 0.4510 7.34e-04 0.1000 0.6280 95.968 304524
0.762 0.7030 -7.951 0.3060 0.2060 0.00e+00 0.0912 0.5190 151.329 247178
0.261 0.0149 -27.528 0.0419 0.9920 8.97e-01 0.1020 0.0382 75.296 286987
0.722 0.7360 -6.994 0.0585 0.4310 1.20e-06 0.1230 0.5820 89.860 208920
0.787 0.5720 -7.516 0.2220 0.1450 0.00e+00 0.0753 0.6470 155.117 179413
0.778 0.6320 -6.415 0.1250 0.0404 0.00e+00 0.0912 0.8270 140.951 224029

Assumptions

1. Corrrelation

Masing-masing variabel memiliki korelasi dan tidak bernilai 0

cor(data_clean)
##             X1         X2          X3          X4          X5          X6
## X1   1.0000000  0.1371879  0.45507806  0.38859635 -0.23417647 -0.80705280
## X2   0.1371879  1.0000000  0.81356741  0.12282548 -0.77258324 -0.24144369
## X3   0.4550781  0.8135674  1.00000000  0.27971003 -0.66498947 -0.53826636
## X4   0.3885964  0.1228255  0.27971003  1.00000000 -0.07971033 -0.34324228
## X5  -0.2341765 -0.7725832 -0.66498947 -0.07971033  1.00000000  0.29432036
## X6  -0.8070528 -0.2414437 -0.53826636 -0.34324228  0.29432036  1.00000000
## X7  -0.1370687  0.1665082  0.07809341 -0.00666515 -0.14098782  0.05573045
## X8   0.6123443  0.3194087  0.36353233  0.18070753 -0.31380646 -0.57222362
## X9   0.2235217  0.2149051  0.27446222  0.31391823 -0.25509744 -0.29949290
## X10 -0.2326207 -0.1345274 -0.20633365 -0.38839658  0.13879339  0.24968279
##              X7          X8          X9        X10
## X1  -0.13706866  0.61234430  0.22352170 -0.2326207
## X2   0.16650819  0.31940867  0.21490513 -0.1345274
## X3   0.07809341  0.36353233  0.27446222 -0.2063337
## X4  -0.00666515  0.18070753  0.31391823 -0.3883966
## X5  -0.14098782 -0.31380646 -0.25509744  0.1387934
## X6   0.05573045 -0.57222362 -0.29949290  0.2496828
## X7   1.00000000 -0.01300429 -0.01055516 -0.1439661
## X8  -0.01300429  1.00000000  0.21801724 -0.1148418
## X9  -0.01055516  0.21801724  1.00000000 -0.2562498
## X10 -0.14396607 -0.11484180 -0.25624981  1.0000000
corrplot::corrplot(cor(data_clean), tl.col = "black", tl.srt = 45, tl.cex = 0.5)

2. MSA

KMO/MSA memilki nilai > 0.5

r <- cor(data_clean)
KMO(r)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r)
## Overall MSA =  0.71
## MSA for each item = 
##   X1   X2   X3   X4   X5   X6   X7   X8   X9  X10 
## 0.67 0.58 0.70 0.76 0.80 0.79 0.60 0.71 0.81 0.77

3. Bartlett test

Dengan syarat hasil p-value < 0.05

bartlett.test(data_clean)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  data_clean
## Bartlett's K-squared = 36677, df = 9, p-value < 2.2e-16

Principal Component Analysis

Principal Component Analysis (PCA) merupakan teknik reduksi dimensi yang melalukan transformasi variabel asli menjadi sejumlah komponen utama baru yang tidak berkorelasi

1. Standarisasi

Data diubah agar memiliki skala yang sama, sehingga semua variabel memiliki kontribusi yang seimbang

Z <- scale(data_clean)

2. PCA

Melakukan PCA untuk mendapatkan hasil loading dan eigen value, sehingga walaupun komponen yang dihasilkan sedikit tetapi informasi yang dimiliki tidak hilang

pca_full <- principal(Z,
                      nfactors = ncol(Z),
                      rotate = "none")
pca_full
## Principal Components Analysis
## Call: principal(r = Z, nfactors = ncol(Z), rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8   PC9  PC10 h2       u2 com
## X1   0.73 -0.51 -0.23  0.18 -0.05  0.02 -0.15 -0.12  0.28  0.08  1  0.0e+00 2.8
## X2   0.69  0.65 -0.05 -0.12 -0.11  0.02  0.11  0.17 -0.02  0.20  1  2.2e-15 2.5
## X3   0.84  0.34 -0.08 -0.09 -0.19  0.09 -0.18  0.24  0.09 -0.17  1  6.7e-16 2.0
## X4   0.48 -0.39  0.49 -0.15 -0.27  0.48  0.23 -0.07 -0.05 -0.01  1  1.1e-16 5.2
## X5  -0.68 -0.57  0.08  0.11  0.01  0.09 -0.03  0.43  0.04  0.05  1  1.2e-15 2.8
## X6  -0.78  0.39  0.19 -0.17 -0.02  0.00  0.31 -0.01  0.29 -0.03  1 -1.3e-15 2.5
## X7   0.06  0.41  0.45  0.71  0.26  0.20 -0.08 -0.01  0.03  0.00  1  8.9e-16 3.0
## X8   0.66 -0.22 -0.34  0.27  0.25 -0.11  0.49  0.09 -0.03 -0.06  1 -8.9e-16 3.7
## X9   0.48 -0.11  0.35 -0.46  0.65  0.00 -0.08  0.04  0.04  0.01  1  6.7e-16 3.5
## X10 -0.40  0.18 -0.68 -0.08  0.23  0.53 -0.03 -0.03  0.00  0.00  1  2.2e-16 3.1
## 
##                        PC1  PC2  PC3  PC4  PC5  PC6  PC7  PC8  PC9 PC10
## SS loadings           3.81 1.69 1.25 0.91 0.73 0.59 0.47 0.30 0.17 0.08
## Proportion Var        0.38 0.17 0.12 0.09 0.07 0.06 0.05 0.03 0.02 0.01
## Cumulative Var        0.38 0.55 0.68 0.77 0.84 0.90 0.94 0.97 0.99 1.00
## Proportion Explained  0.38 0.17 0.12 0.09 0.07 0.06 0.05 0.03 0.02 0.01
## Cumulative Proportion 0.38 0.55 0.68 0.77 0.84 0.90 0.94 0.97 0.99 1.00
## 
## Mean item complexity =  3.1
## Test of the hypothesis that 10 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0 
##  with the empirical chi square  0  with prob <  NA 
## 
## Fit based upon off diagonal values = 1

3. Scree Plot

Untuk menentukan komponen PCA yang paling memiliki pengaruh

pca_vis <- prcomp(Z, scale. = FALSE)

fviz_eig(pca_vis,
         addlabels = TRUE,
         barfill = "skyblue",
         barcolor = "darkblue",
         linecolor = "red")
## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.

4. Mengambil 4 Komponen

pca_result <- principal(Z,
                        nfactors = 4,
                        rotate = "none")

pca_result
## Principal Components Analysis
## Call: principal(r = Z, nfactors = 4, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       PC1   PC2   PC3   PC4   h2    u2 com
## X1   0.73 -0.51 -0.23  0.18 0.88 0.123 2.2
## X2   0.69  0.65 -0.05 -0.12 0.91 0.093 2.1
## X3   0.84  0.34 -0.08 -0.09 0.82 0.175 1.4
## X4   0.48 -0.39  0.49 -0.15 0.63 0.366 3.1
## X5  -0.68 -0.57  0.08  0.11 0.80 0.197 2.0
## X6  -0.78  0.39  0.19 -0.17 0.82 0.179 1.7
## X7   0.06  0.41  0.45  0.71 0.88 0.116 2.4
## X8   0.66 -0.22 -0.34  0.27 0.68 0.323 2.2
## X9   0.48 -0.11  0.35 -0.46 0.57 0.428 3.0
## X10 -0.40  0.18 -0.68 -0.08 0.66 0.338 1.8
## 
##                        PC1  PC2  PC3  PC4
## SS loadings           3.81 1.69 1.25 0.91
## Proportion Var        0.38 0.17 0.12 0.09
## Cumulative Var        0.38 0.55 0.68 0.77
## Proportion Explained  0.50 0.22 0.16 0.12
## Cumulative Proportion 0.50 0.72 0.88 1.00
## 
## Mean item complexity =  2.2
## Test of the hypothesis that 4 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.05 
##  with the empirical chi square  52.99  with prob <  1.8e-07 
## 
## Fit based upon off diagonal values = 0.98

5. Menghitung Eigen Vector dari Loading

Fungsi principal() memberikan loading dan bukan eigenvector, sehingga untuk mendapatkan hasil final skor PCA dibutuhkan eigenvector agar sesuai dengan rumus aslinya

L <- as.matrix(pca_result$loadings)
lambda <- pca_result$values[1:4]

V <- sweep(L, 2, sqrt(lambda), "/")
V
## 
## Loadings:
##     PC1    PC2    PC3    PC4   
## X1   0.372 -0.395 -0.209  0.187
## X2   0.351  0.498        -0.126
## X3   0.428  0.259              
## X4   0.245 -0.296  0.435 -0.157
## X5  -0.349 -0.436         0.113
## X6  -0.397  0.304  0.167 -0.178
## X7          0.319  0.405  0.743
## X8   0.339 -0.171 -0.305  0.286
## X9   0.244         0.313 -0.480
## X10 -0.203  0.142 -0.610       
## 
##                PC1 PC2 PC3 PC4
## SS loadings    1.0 1.0 1.0 1.0
## Proportion Var 0.1 0.1 0.1 0.1
## Cumulative Var 0.1 0.2 0.3 0.4

6. Menghitung Skor PCA

Skor menunjukkan posisi setiap objek pada komponen utama, dimana ketika memiliki nilai tinggi maka objek punya karakteristik tertentu

scores <- Z %*% V
head(scores)
##               PC1         PC2         PC3        PC4
## [1,] -0.003877716 -0.29701663 -2.00275192  0.5130620
## [2,]  1.247996752 -0.66125321  0.10228409 -1.0215715
## [3,] -5.621553101 -0.82116628 -0.01689206  0.1259920
## [4,]  0.202192105 -0.01384421 -1.02648135  0.7033173
## [5,]  1.418859682 -0.92545428  0.18695078 -0.8128683
## [6,]  1.457208404 -0.30579038 -0.88004258 -0.2837756

Visualisasi Principal Component Analysis

1. Biplot PCA

fviz_pca_biplot(pca_vis,
                geom.ind = "point",
                addEllipses = TRUE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
##   Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Interpretasi :

  • Pada biplot PCA ini terdapat 2 komponen utama yaitu PC1 (38,1%) yang positif kearah kanan dan negatif kearah kiri, serta PC2 (16,9) yang positif kearah atas dan negatif kearah bawah
  • Semakin panjang panah maka semakin berpengaruh variabel tersebut pada komponen utama atau PC1 PC2
  • Pada PC1, variabel X5 dan X6 berkontribusi ke arah positif dan variabel X1, X2, X3 cenderung berpengaruh ke arah negatif
  • Pada PC2, variabel X2 dan X6 cenderung kontribusi kearah positif dan variabel X1, X5 cenderung berpengaruh ke arah negatif

2. Contribution Plot

p1 <- fviz_contrib(pca_vis, choice = "var", axes = 1, top = 5) + ggtitle("PC1")

p2 <- fviz_contrib(pca_vis, choice = "var", axes = 2, top = 5) + ggtitle("PC2")

p3 <- fviz_contrib(pca_vis, choice = "var", axes = 3, top = 5) + ggtitle("PC3")

p4 <- fviz_contrib(pca_vis, choice = "var", axes = 4, top = 5) + ggtitle("PC4")

gridExtra::grid.arrange(p1, p2, p3, p4, nrow = 1)

Interpretasi :

  • Berbeda dengan biplot, contribution plot hanya melihat secara garis besar pengaruh masing-masing variabel pada komponen utama (PC1, PC2, PC3, PC4)
  • Terlihat dari visualisasi tersebut, PC1 sangat dipengaruhi oleh variabel X3, PC2 sangat dipengaruhi oleh variabel X2, PC3 sangat dipengaruhi variabel X10, dan PC4 sangat dipengaruhi variabel X7

Factor Analysis

Factor Analysis (FA) adalah teknik statistik untuk Mencari faktor laten (tersembunyi) yang menyebabkan variabel-variabel saling berkorelasi.

1. Scree Plot Penentuan Jumlah Faktor

Menentukan jumlah faktor yang optimal dengan membandingkan eigenvalue pada data aktual, data simulasi acak, dan data hasil resampling.

fa.parallel(Z, fm = "pa", fa = "fa")

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA

Hasilnya menunjukkan jumlah faktor yang layak dipertahankan dalam analisis adalah 3 faktor, karena hanya tiga faktor tersebut yang memiliki eigenvalue lebih besar dari eigenvalue data simulasi.

2. Ekstraksi Faktor

fa_pa <- fa(Z,
            nfactors = 3,
            rotate = "none",
            fm = "pa")

fa_pa
## Factor Analysis using method =  pa
## Call: fa(r = Z, nfactors = 3, rotate = "none", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       PA1   PA2   PA3    h2      u2 com
## X1   0.74 -0.58 -0.21 0.925 0.07452 2.1
## X2   0.73  0.69 -0.03 0.999 0.00084 2.0
## X3   0.82  0.29 -0.02 0.760 0.23951 1.2
## X4   0.41 -0.27  0.44 0.444 0.55623 2.7
## X5  -0.65 -0.46  0.06 0.632 0.36778 1.8
## X6  -0.75  0.41  0.13 0.754 0.24596 1.6
## X7   0.05  0.21  0.16 0.072 0.92841 2.0
## X8   0.59 -0.20 -0.22 0.436 0.56399 1.5
## X9   0.39 -0.06  0.24 0.214 0.78589 1.7
## X10 -0.34  0.13 -0.51 0.387 0.61339 1.9
## 
##                        PA1  PA2  PA3
## SS loadings           3.52 1.45 0.65
## Proportion Var        0.35 0.15 0.07
## Cumulative Var        0.35 0.50 0.56
## Proportion Explained  0.63 0.26 0.12
## Cumulative Proportion 0.63 0.88 1.00
## 
## Mean item complexity =  1.9
## Test of the hypothesis that 3 factors are sufficient.
## 
## df null model =  45  with the objective function =  5.06 with Chi Square =  960.01
## df of  the model are 18  and the objective function was  0.28 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic n.obs is  195 with the empirical chi square  6.6  with prob <  0.99 
## The total n.obs was  195  with Likelihood Chi Square =  53.07  with prob <  2.6e-05 
## 
## Tucker Lewis Index of factoring reliability =  0.903
## RMSEA index =  0.1  and the 90 % confidence intervals are  0.07 0.132
## BIC =  -41.84
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    PA1  PA2  PA3
## Correlation of (regression) scores with factors   0.98 0.98 0.75
## Multiple R square of scores with factors          0.97 0.97 0.56
## Minimum correlation of possible factor scores     0.93 0.93 0.11

Pada tahap ekstraksi tanpa rotasi menunjukkan bahwa dua faktor utama memiliki eigenvalue > 1, sedangkan faktor ketiga memiliki eigenvalue < 1 sehingga kontribusinya relatif kecil. Ketiga faktor mampu menjelaskan 56% total variansi data. Nilai RMSR sebesar 0,03 menunjukkan model cukup baik, namun nilai RMSEA sebesar 0,10 mengindikasikan bahwa model masih kurang optimal. Selain itu, beberapa variabel seperti X7 dan X9 memiliki nilai communalities yang rendah sehingga kurang representatif dalam pembentukan faktor.

3. Rotasi Varimax

fa_varimax <- fa(Z,
                 nfactors = 3,
                 rotate = "varimax",
                 fm = "pa",
                 scores = TRUE)

fa_varimax
## Factor Analysis using method =  pa
## Call: fa(r = Z, nfactors = 3, rotate = "varimax", scores = TRUE, fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       PA2   PA1   PA3    h2      u2 com
## X1   0.06  0.92  0.28 0.925 0.07452 1.2
## X2   0.99  0.07  0.08 0.999 0.00084 1.0
## X3   0.75  0.36  0.24 0.760 0.23951 1.7
## X4   0.03  0.21  0.63 0.444 0.55623 1.2
## X5  -0.77 -0.17 -0.09 0.632 0.36778 1.1
## X6  -0.19 -0.79 -0.31 0.754 0.24596 1.4
## X7   0.18 -0.17  0.10 0.072 0.92841 2.5
## X8   0.25  0.60  0.11 0.436 0.56399 1.4
## X9   0.19  0.16  0.39 0.214 0.78589 1.8
## X10 -0.09 -0.04 -0.61 0.387 0.61339 1.0
## 
##                        PA2  PA1  PA3
## SS loadings           2.33 2.10 1.20
## Proportion Var        0.23 0.21 0.12
## Cumulative Var        0.23 0.44 0.56
## Proportion Explained  0.41 0.37 0.21
## Cumulative Proportion 0.41 0.79 1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 3 factors are sufficient.
## 
## df null model =  45  with the objective function =  5.06 with Chi Square =  960.01
## df of  the model are 18  and the objective function was  0.28 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic n.obs is  195 with the empirical chi square  6.6  with prob <  0.99 
## The total n.obs was  195  with Likelihood Chi Square =  53.07  with prob <  2.6e-05 
## 
## Tucker Lewis Index of factoring reliability =  0.903
## RMSEA index =  0.1  and the 90 % confidence intervals are  0.07 0.132
## BIC =  -41.84
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    PA2  PA1  PA3
## Correlation of (regression) scores with factors   1.00 0.95 0.77
## Multiple R square of scores with factors          1.00 0.90 0.59
## Minimum correlation of possible factor scores     0.99 0.79 0.19

Hasil setelah rotasi Varimax menunjukkan bahwa terbentuk tiga faktor utama. Faktor pertama terdiri dari variabel X2, X3, X5, dan X6. Faktor kedua terdiri dari X1 dan X8. Faktor ketiga terdiri dari X4, X9, dan X10. Ketiga faktor mampu menjelaskan 56% total variansi data. Nilai RMSR sebesar 0,03 dan TLI sebesar 0,903 menunjukkan model cukup baik, meskipun nilai RMSEA sebesar 0,10 mengindikasikan model belum sepenuhnya optimal. Variabel X7 memiliki nilai communalities rendah sehingga kontribusinya terhadap model relatif kecil.

4. Perbaikan FA Pertama

Z_new <- Z[, !colnames(Z) %in% c("X7")]

Variabel X7 dihapus dari model karena tidak memiliki factor loading yang signifikan pada ketiga faktor (seluruh loading < 0,30). Selain itu, nilai communalitiesnya menunjukkan bahwa variabel tersebut tidak mampu dijelaskan oleh faktor yang terbentuk. Oleh karena itu, X7 dianggap tidak representatif dalam struktur faktor dan dikeluarkan dari analisis untuk meningkatkan kualitas model.

- Asumsi Ulang

  • MSA
r_baru <- cor(Z_new)
KMO(r_baru)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r_baru)
## Overall MSA =  0.71
## MSA for each item = 
##   X1   X2   X3   X4   X5   X6   X8   X9  X10 
## 0.67 0.57 0.69 0.76 0.80 0.79 0.71 0.82 0.80
  • Uji Bartlett
cortest.bartlett(r_baru)
## Warning in cortest.bartlett(r_baru): n not specified, 100 used
## $chisq
## [1] 472.2902
## 
## $p.value
## [1] 1.856749e-77
## 
## $df
## [1] 36

Scree Plot Penentuan Jumlah Faktor Baru

fa.parallel(Z_new, fm = "pa", fa = "fa")

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA

Rotasi Varimax Baru

fa_varimax <- fa(Z_new,
                 nfactors = 3,
                 rotate = "varimax",
                 fm = "pa",
                 scores = TRUE)
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
fa_varimax
## Factor Analysis using method =  pa
## Call: fa(r = Z_new, nfactors = 3, rotate = "varimax", scores = TRUE, 
##     fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       PA2   PA1   PA3   h2      u2 com
## X1   0.06  0.91  0.27 0.90  0.0996 1.2
## X2   1.00  0.06  0.10 1.01 -0.0098 1.0
## X3   0.76  0.36  0.25 0.76  0.2369 1.7
## X4   0.03  0.20  0.69 0.52  0.4848 1.2
## X5  -0.77 -0.17 -0.09 0.62  0.3755 1.1
## X6  -0.19 -0.80 -0.29 0.77  0.2341 1.4
## X8   0.24  0.62  0.09 0.45  0.5510 1.4
## X9   0.19  0.16  0.40 0.22  0.7783 1.8
## X10 -0.08 -0.09 -0.54 0.31  0.6930 1.1
## 
##                        PA2  PA1  PA3
## SS loadings           2.30 2.08 1.18
## Proportion Var        0.26 0.23 0.13
## Cumulative Var        0.26 0.49 0.62
## Proportion Explained  0.41 0.37 0.21
## Cumulative Proportion 0.41 0.79 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 3 factors are sufficient.
## 
## df null model =  36  with the objective function =  4.96 with Chi Square =  943.75
## df of  the model are 12  and the objective function was  0.24 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic n.obs is  195 with the empirical chi square  3.64  with prob <  0.99 
## The total n.obs was  195  with Likelihood Chi Square =  44.46  with prob <  1.3e-05 
## 
## Tucker Lewis Index of factoring reliability =  0.892
## RMSEA index =  0.118  and the 90 % confidence intervals are  0.082 0.156
## BIC =  -18.81
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    PA2  PA1  PA3
## Correlation of (regression) scores with factors   1.00 0.94 0.77
## Multiple R square of scores with factors          1.00 0.89 0.60
## Minimum correlation of possible factor scores     0.99 0.77 0.20

Hasil rotasi Varimax menunjukkan bahwa terbentuk tiga faktor utama. Faktor pertama terdiri dari variabel X2, X3, X5, dan X6. Faktor kedua terdiri dari X1 dan X8. Faktor ketiga terdiri dari X4 dan X10. Nilai RMSR sebesar 0,02 dan TLI sebesar 0,892 menunjukkan model cukup baik, meskipun nilai RMSEA sebesar 0,118 mengindikasikan model belum sepenuhnya optimal. Variabel X9 memiliki nilai communalities rendah sehingga kontribusinya terhadap model relatif kecil.

5. Perbaikan FA Kedua

Z_new3 <- Z_new[, -which(colnames(Z_new)=="X9")]

Variabel X9 dihapus dari model karena memiliki nilai communalities < 0.30 menunjukkan bahwa variabel tersebut tidak mampu dijelaskan oleh faktor yang terbentuk. Oleh karena itu, X9 dianggap tidak representatif dalam struktur faktor dan dikeluarkan dari analisis untuk meningkatkan kualitas model.

- Asumsi Ulang

  • MSA
r_baru1 <- cor(Z_new3)
KMO(r_baru1)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r_baru1)
## Overall MSA =  0.7
## MSA for each item = 
##   X1   X2   X3   X4   X5   X6   X8  X10 
## 0.67 0.56 0.69 0.77 0.81 0.79 0.71 0.75
  • Uji Bartlett
cortest.bartlett(r_baru1)
## Warning in cortest.bartlett(r_baru1): n not specified, 100 used
## $chisq
## [1] 453.2787
## 
## $p.value
## [1] 2.64483e-78
## 
## $df
## [1] 28

- Scree plot penentuan jumlah faktor baru

fa.parallel(Z_new3, fm = "pa", fa = "fa")

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA

- Rotasi Varimax Baru

fa_varimax <- fa(Z_new3,
                 nfactors = 3,
                 rotate = "varimax",
                 fm = "pa",
                 scores = TRUE)
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
fa_varimax
## Factor Analysis using method =  pa
## Call: fa(r = Z_new3, nfactors = 3, rotate = "varimax", scores = TRUE, 
##     fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       PA2   PA1   PA3   h2     u2 com
## X1   0.07  0.91  0.28 0.91  0.093 1.2
## X2   1.01  0.06  0.08 1.03 -0.027 1.0
## X3   0.76  0.36  0.24 0.77  0.229 1.6
## X4   0.04  0.19  0.74 0.59  0.411 1.1
## X5  -0.76 -0.19 -0.04 0.61  0.386 1.1
## X6  -0.19 -0.81 -0.26 0.76  0.240 1.3
## X8   0.24  0.63  0.05 0.45  0.547 1.3
## X10 -0.10 -0.11 -0.48 0.26  0.744 1.2
## 
##                        PA2  PA1  PA3
## SS loadings           2.29 2.08 1.00
## Proportion Var        0.29 0.26 0.12
## Cumulative Var        0.29 0.55 0.67
## Proportion Explained  0.43 0.39 0.19
## Cumulative Proportion 0.43 0.81 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 3 factors are sufficient.
## 
## df null model =  28  with the objective function =  4.75 with Chi Square =  904.18
## df of  the model are 7  and the objective function was  0.17 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic n.obs is  195 with the empirical chi square  1.91  with prob <  0.96 
## The total n.obs was  195  with Likelihood Chi Square =  32.26  with prob <  3.6e-05 
## 
## Tucker Lewis Index of factoring reliability =  0.883
## RMSEA index =  0.136  and the 90 % confidence intervals are  0.091 0.186
## BIC =  -4.65
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                   PA2  PA1  PA3
## Correlation of (regression) scores with factors     1 0.94 0.78
## Multiple R square of scores with factors            1 0.89 0.61
## Minimum correlation of possible factor scores       1 0.78 0.22

Hasil rotasi Varimax menunjukkan bahwa terbentuk tiga faktor utama. Faktor pertama terdiri dari variabel X2, X3, X5, dan X6. Faktor kedua terdiri dari X1 dan X8. Faktor ketiga terdiri dari X4, X9, dan X10. Nilai RMSR sebesar 0,02 dan TLI sebesar 0,883 menunjukkan model cukup baik, meskipun nilai RMSEA sebesar 0,136 mengindikasikan model belum sepenuhnya optimal. Variabel X10 memiliki nilai communalities rendah dan X2 memiliki nilai communalities sangat tinggi sehingga kontribusinya terhadap model kurang baik.

6. Perbaikan FA Ketiga

Z_new2 <- Z_new3[, !(colnames(Z_new3) %in% c("X2","X10"))]

Variabel X2 dan X10 dihapus dari model karena tidak memiliki factor loading yang signifikan pada ketiga faktor (seluruh loading < 0,30 dan > 1,00). Selain itu, nilai communalitiesnya menunjukkan bahwa variabel X10 tidak mampu dijelaskan oleh faktor yang terbentuk dan untuk X2 menunjukkan relasi yang sangat kuat. Oleh karena itu, x2 dan X10 dianggap tidak representatif dalam struktur faktor dan dikeluarkan dari analisis untuk meningkatkan kualitas model.

- Asumsi Ulang

  • MSA
r_baru2 <- cor(Z_new2)
KMO(r_baru2)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r_baru2)
## Overall MSA =  0.73
## MSA for each item = 
##   X1   X3   X4   X5   X6   X8 
## 0.72 0.69 0.82 0.60 0.75 0.85
  • Uji Bartlett
cortest.bartlett(r_baru2)
## Warning in cortest.bartlett(r_baru2): n not specified, 100 used
## $chisq
## [1] 263.1868
## 
## $p.value
## [1] 2.368208e-47
## 
## $df
## [1] 15

- Scree plot penentuan jumlah faktor baru

fa.parallel(Z_new2, fm = "pa", fa = "fa")

## Parallel analysis suggests that the number of factors =  2  and the number of components =  NA

- Rotasi Varimax Baru

fa_varimax <- fa(Z_new2,
                 nfactors = 2,
                 rotate = "varimax",
                 fm = "pa",
                 scores = TRUE)
## maximum iteration exceeded
fa_varimax
## Factor Analysis using method =  pa
## Call: fa(r = Z_new2, nfactors = 2, rotate = "varimax", scores = TRUE, 
##     fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PA1   PA2   h2    u2 com
## X1  0.94  0.14 0.90 0.095 1.0
## X3  0.40  0.70 0.65 0.350 1.6
## X4  0.38  0.09 0.15 0.846 1.1
## X5 -0.10 -0.89 0.80 0.198 1.0
## X6 -0.84 -0.26 0.77 0.231 1.2
## X8  0.58  0.25 0.40 0.595 1.4
## 
##                        PA1  PA2
## SS loadings           2.24 1.44
## Proportion Var        0.37 0.24
## Cumulative Var        0.37 0.61
## Proportion Explained  0.61 0.39
## Cumulative Proportion 0.61 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  15  with the objective function =  2.74 with Chi Square =  523.18
## df of  the model are 4  and the objective function was  0.09 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.06 
## 
## The harmonic n.obs is  195 with the empirical chi square  3.13  with prob <  0.54 
## The total n.obs was  195  with Likelihood Chi Square =  17.2  with prob <  0.0018 
## 
## Tucker Lewis Index of factoring reliability =  0.902
## RMSEA index =  0.13  and the 90 % confidence intervals are  0.071 0.197
## BIC =  -3.9
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    PA1  PA2
## Correlation of (regression) scores with factors   0.96 0.91
## Multiple R square of scores with factors          0.92 0.83
## Minimum correlation of possible factor scores     0.84 0.67

Hasil rotasi Varimax menunjukkan bahwa terbentuk dua faktor utama karena pada model 3 faktor muncul ultra-heywood pada X2. Faktor pertama terdiri dari variabel X1, X6, dan X8. Faktor kedua terdiri dari X3 dan X5. Kedua faktor mampu menjelaskan 61% total variansi data. Nilai RMSR sebesar 0,03 dan TLI sebesar 0,902 menunjukkan model cukup baik, meskipun nilai RMSEA sebesar 0,13 mengindikasikan model belum sepenuhnya optimal dan varibael X4 memiliki nilai communalities rendah sehingga kontribusinya terhadap model kurang baik.

7. Perbaikan FA Keempat

Z_new4 <- Z_new2[, !colnames(Z_new2) %in% c("X4")]

Variabel X4 dihapus dari model karena memiliki nilai communalities < 0,30 menunjukkan bahwa variabel tersebut tidak mampu dijelaskan oleh faktor yang terbentuk. Oleh karena itu, X4 dianggap tidak representatif dalam struktur faktor dan dikeluarkan dari analisis untuk meningkatkan kualitas model.

- Asumsi Ulang

  • MSA
r_baru4 <- cor(Z_new4)
KMO(r_baru4)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r_baru4)
## Overall MSA =  0.71
## MSA for each item = 
##   X1   X3   X5   X6   X8 
## 0.70 0.69 0.60 0.72 0.85
  • Uji Bartlett
cortest.bartlett(r_baru4)
## Warning in cortest.bartlett(r_baru4): n not specified, 100 used
## $chisq
## [1] 244.7261
## 
## $p.value
## [1] 6.967857e-47
## 
## $df
## [1] 10

- Scree plot penentuan jumlah faktor baru

fa.parallel(Z_new4, fm = "pa", fa = "fa")

## Parallel analysis suggests that the number of factors =  2  and the number of components =  NA

- Rotasi Varimax Baru

fa_varimax <- fa(Z_new4,
                 nfactors = 2,
                 rotate = "varimax",
                 fm = "pa",
                 scores = TRUE)
## maximum iteration exceeded
fa_varimax
## Factor Analysis using method =  pa
## Call: fa(r = Z_new4, nfactors = 2, rotate = "varimax", scores = TRUE, 
##     fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PA1   PA2   h2   u2 com
## X1  0.92 -0.14 0.88 0.12 1.0
## X3  0.39 -0.70 0.64 0.36 1.6
## X5 -0.11  0.89 0.80 0.20 1.0
## X6 -0.84  0.25 0.78 0.22 1.2
## X8  0.61 -0.24 0.43 0.57 1.3
## 
##                        PA1  PA2
## SS loadings           2.10 1.42
## Proportion Var        0.42 0.28
## Cumulative Var        0.42 0.70
## Proportion Explained  0.60 0.40
## Cumulative Proportion 0.60 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  10  with the objective function =  2.54 with Chi Square =  485.65
## df of  the model are 1  and the objective function was  0.05 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic n.obs is  195 with the empirical chi square  0.94  with prob <  0.33 
## The total n.obs was  195  with Likelihood Chi Square =  8.79  with prob <  0.003 
## 
## Tucker Lewis Index of factoring reliability =  0.835
## RMSEA index =  0.2  and the 90 % confidence intervals are  0.095 0.331
## BIC =  3.51
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    PA1  PA2
## Correlation of (regression) scores with factors   0.95 0.91
## Multiple R square of scores with factors          0.90 0.83
## Minimum correlation of possible factor scores     0.81 0.65

Hasil rotasi Varimax menunjukkan bahwa terbentuk dua faktor utama. Faktor pertama terdiri dari variabel X1, X6, dan X8. Faktor kedua terdiri dari X3 dan X5. Kedua faktor mampu menjelaskan 70% total variansi data. Nilai RMSR sebesar 0,02 dan TLI sebesar 0,835 menunjukkan model cukup baik, meskipun nilai RMSEA sebesar 0,20 tergolong tinggi. Secara keseluruhan, model dua faktor dapat dinilai cukup memadai dan stabil dalam merepresentasikan struktur data.

8. Interpretasi Factor Loading

print(fa_varimax$loadings, cutoff = 0.3)
## 
## Loadings:
##    PA1    PA2   
## X1  0.925       
## X3  0.388 -0.700
## X5         0.885
## X6 -0.844       
## X8  0.608       
## 
##                  PA1   PA2
## SS loadings    2.101 1.416
## Proportion Var 0.420 0.283
## Cumulative Var 0.420 0.703
loadings <- as.data.frame(unclass(fa_varimax$loadings))
loadings$Variable <- rownames(loadings)
loadings
##           PA1        PA2 Variable
## X1  0.9246307 -0.1434650       X1
## X3  0.3883844 -0.6999034       X3
## X5 -0.1137521  0.8851337       X5
## X6 -0.8442779  0.2519433       X6
## X8  0.6080635 -0.2414183       X8

Hasil rotasi Varimax menunjukkan terbentuk dua faktor utama. Faktor pertama (PA1) dibentuk oleh variabel X1 (0,925), X6 (−0,844), dan X8 (0,608). Faktor kedua (PA2) dibentuk oleh variabel X5 (0,885) dan X3 (−0,700). Kedua faktor mampu menjelaskan 70,3% total variansi data, sehingga model dua faktor dinilai cukup baik dalam merepresentasikan struktur data.

9. Evaluasi Model

fa_varimax$RMSEA
##      RMSEA      lower      upper confidence 
## 0.19977581 0.09473164 0.33092524 0.90000000
fa_varimax$TLI
## [1] 0.8351001
fa_varimax$rms
## [1] 0.02195696
fa_varimax$BIC
## [1] 3.514678
fa_varimax$Vaccounted
##                             PA1       PA2
## SS loadings           2.1012703 1.4156668
## Proportion Var        0.4202541 0.2831334
## Cumulative Var        0.4202541 0.7033874
## Proportion Explained  0.5974717 0.4025283
## Cumulative Proportion 0.5974717 1.0000000
summary(fa_varimax)
## 
## Factor analysis with Call: fa(r = Z_new4, nfactors = 2, rotate = "varimax", scores = TRUE, 
##     fm = "pa")
## 
## Test of the hypothesis that 2 factors are sufficient.
## The degrees of freedom for the model is 1  and the objective function was  0.05 
## The number of observations was  195  with Chi Square =  8.79  with prob <  0.003 
## 
## The root mean square of the residuals (RMSA) is  0.02 
## The df corrected root mean square of the residuals is  0.07 
## 
## Tucker Lewis Index of factoring reliability =  0.835
## RMSEA index =  0.2  and the 90 % confidence intervals are  0.095 0.331
## BIC =  3.51

10. Faktor Score FA

fa_scores <- fa_varimax$scores
head(fa_scores)
##             PA1        PA2
## [1,]  0.7793723  0.3066403
## [2,]  0.5125727 -0.2359794
## [3,] -1.5738855  2.0589759
## [4,]  0.5097425  0.2029549
## [5,]  0.5936671 -0.3747409
## [6,]  0.5621896 -0.6708985

Visualisasi Factor Analysis

1. Diagram Faktor Analysis

fa.diagram(fa_varimax$loadings)

2. Visualisasi Faktor Analysis Barplot

loadings <- as.data.frame(unclass(fa_varimax$loadings))
loadings$Variable <- rownames(loadings)

load_melt <- melt(loadings, id.vars = "Variable")

ggplot(load_melt, aes(x = Variable, y = value)) +
  geom_bar(stat = "identity") +
  facet_wrap(~variable) +
  coord_flip() +
  ggtitle("Factor Loadings (Varimax Rotation)")