Load data

library(psych)
library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

library(corrplot)

## corrplot 0.95 loaded

library(factoextra)

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readr)
library(ppcor)

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

df <- read_csv("WHR2024.csv")

## Rows: 143 Columns: 11

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Country name
## dbl (10): Ladder score, upperwhisker, lowerwhisker, Explained by: Log GDP pe...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

df_clean <- df %>%
  rename(
    Ladder_Score = `Ladder score`,
    Log_GDP      = `Explained by: Log GDP per capita`,
    Social       = `Explained by: Social support`,
    Life_Exp     = `Explained by: Healthy life expectancy`,
    Freedom      = `Explained by: Freedom to make life choices`,
    Generosity   = `Explained by: Generosity`,
    Corruption   = `Explained by: Perceptions of corruption`
  )

data_raw <- df_clean %>%
  dplyr::select(Ladder_Score, Log_GDP, Social, Life_Exp, Freedom, Generosity, Corruption) %>%
  rename(X1=Ladder_Score,
         X2=Log_GDP,
         X3=Social,
         X4=Life_Exp,
         X5=Freedom,
         X6=Generosity,
         X7=Corruption)

varnames <- colnames(data_raw)

cat("Jumlah observasi Raw Data         :", nrow(data_raw), "\n")

## Jumlah observasi Raw Data         : 143

cat("Jumlah variabel                   :", ncol(data_raw), "\n")

## Jumlah variabel                   : 7

cat("Jumlah missing value              :", sum(is.na(data_raw)), "\n")

## Jumlah missing value              : 18

cat("Jumlah baris yang memiliki missing value :", sum(!complete.cases(data_raw)), "\n")

## Jumlah baris yang memiliki missing value : 3

cat("Rasio obs/var                     :", round(nrow(data_raw)/ncol(data_raw),1), ": 1\n")

## Rasio obs/var                     : 20.4 : 1

Handling Missing Value

data_num <- data_raw %>%
  na.omit()
varnames <- colnames(data_num)
cat("Jumlah observasi setelah hapus NA :", nrow(data_num), "\n")

## Jumlah observasi setelah hapus NA : 140

cat("Jumlah variabel                   :", ncol(data_num), "\n")

## Jumlah variabel                   : 7

cat("Rasio obs/var                     :", round(nrow(data_num)/ncol(data_num),1), ": 1\n")

## Rasio obs/var                     : 20 : 1

Statistika Deskriptif

desc <- describe(data_num)
round(desc[,c("n","mean","sd","min","max","skew","kurtosis")],4)

##      n mean   sd  min  max  skew kurtosis
## X1 140 5.53 1.18 1.72 7.74 -0.52    -0.29
## X2 140 1.38 0.43 0.00 2.14 -0.50    -0.42
## X3 140 1.13 0.33 0.00 1.62 -0.97     0.40
## X4 140 0.52 0.16 0.00 0.86 -0.53    -0.44
## X5 140 0.62 0.16 0.00 0.86 -1.00     1.16
## X6 140 0.15 0.07 0.00 0.40  0.65     0.72
## X7 140 0.15 0.13 0.00 0.58  1.49     1.83

Matriks Correlation

mat_corr <- cor(data_num)
round(mat_corr,3)

##       X1     X2    X3    X4    X5     X6    X7
## X1 1.000  0.769 0.814 0.760 0.644  0.130 0.452
## X2 0.769  1.000 0.727 0.830 0.415 -0.059 0.444
## X3 0.814  0.727 1.000 0.707 0.485  0.079 0.251
## X4 0.760  0.830 0.707 1.000 0.401  0.007 0.399
## X5 0.644  0.415 0.485 0.401 1.000  0.224 0.344
## X6 0.130 -0.059 0.079 0.007 0.224  1.000 0.173
## X7 0.452  0.444 0.251 0.399 0.344  0.173 1.000

corrplot(mat_corr,
         method="color",
         type="upper",
         addCoef.col="black")

bartlett <- cortest.bartlett(mat_corr,n=nrow(data_num))
bartlett

## $chisq
## [1] 601.3328
## 
## $p.value
## [1] 8.386986e-114
## 
## $df
## [1] 21

KMO Test (Iterasi 1)

kmo1 <- KMO(data_num)
cat("Overall MSA:",kmo1$MSA,"\n")

## Overall MSA: 0.8162012

round(kmo1$MSAi,3)

##    X1    X2    X3    X4    X5    X6    X7 
## 0.804 0.822 0.829 0.856 0.826 0.474 0.774

Hapus Variabel dengan MSA <0.5

msa_low <- names(kmo1$MSAi[kmo1$MSAi < 0.5])

if(length(msa_low) > 0){
  cat("Variabel dengan MSA < 0.5:", msa_low, "\n")
  data_num <- data_num %>% dplyr::select(-dplyr::all_of(msa_low))
}

## Variabel dengan MSA < 0.5: X6

varnames <- colnames(data_num)
cat("Variabel tersisa:", varnames, "\n")

## Variabel tersisa: X1 X2 X3 X4 X5 X7

KMO iterasi 2

KMO(data_num)

## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_num)
## Overall MSA =  0.83
## MSA for each item = 
##   X1   X2   X3   X4   X5   X7 
## 0.80 0.84 0.83 0.85 0.82 0.80

mat_corr <- cor(data_num)
round(mat_corr,3)

##       X1    X2    X3    X4    X5    X7
## X1 1.000 0.769 0.814 0.760 0.644 0.452
## X2 0.769 1.000 0.727 0.830 0.415 0.444
## X3 0.814 0.727 1.000 0.707 0.485 0.251
## X4 0.760 0.830 0.707 1.000 0.401 0.399
## X5 0.644 0.415 0.485 0.401 1.000 0.344
## X7 0.452 0.444 0.251 0.399 0.344 1.000

KMO(data_num)

## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_num)
## Overall MSA =  0.83
## MSA for each item = 
##   X1   X2   X3   X4   X5   X7 
## 0.80 0.84 0.83 0.85 0.82 0.80

BartleTt Test Setelah X6 dihapus

mat_corr2 <- cor(data_num)
bartlett2 <- cortest.bartlett(mat_corr2, n = nrow(data_num))
bartlett2

## $chisq
## [1] 583.6702
## 
## $p.value
## [1] 1.043867e-114
## 
## $df
## [1] 15

Standarisasi data

Sebelum masuk ke analisis selanjutnya, data distandarisasi dahulu agar tiap variabel memiliki satuan yang sama.

data_scaled <- scale(data_num)

PCA awal

pca_awal <- prcomp(data_scaled, center = TRUE, scale. = TRUE)
summary(pca_awal)

## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     1.9767 0.9016 0.8377 0.52700 0.40982 0.36373
## Proportion of Variance 0.6512 0.1355 0.1169 0.04629 0.02799 0.02205
## Cumulative Proportion  0.6512 0.7867 0.9037 0.94996 0.97795 1.00000

Scree plot

eigenvalues <- pca_awal$sdev^2
plot(
  eigenvalues,
  type = "b",
  pch = 1,
  xlab = "Component",
  ylab = "Eigenvalues",
  main = "Scree Plot PCA"
)
abline(h = 1, col = "red", lty = 2)

PCA final

pca_final <- principal(
  data_scaled,
  nfactors = 2,
  rotate = "none"
)
print(pca_final)

## Principal Components Analysis
## Call: principal(r = data_scaled, nfactors = 2, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##     PC1   PC2   h2    u2 com
## X1 0.94 -0.05 0.88 0.122 1.0
## X2 0.89 -0.12 0.81 0.193 1.0
## X3 0.85 -0.33 0.84 0.158 1.3
## X4 0.87 -0.17 0.79 0.207 1.1
## X5 0.66  0.24 0.50 0.503 1.3
## X7 0.55  0.77 0.90 0.096 1.8
## 
##                        PC1  PC2
## SS loadings           3.91 0.81
## Proportion Var        0.65 0.14
## Cumulative Var        0.65 0.79
## Proportion Explained  0.83 0.17
## Cumulative Proportion 0.83 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.06 
##  with the empirical chi square  16.31  with prob <  0.0026 
## 
## Fit based upon off diagonal values = 0.99

pca_rot <- principal(
  data_scaled,
  nfactors = 2,
  rotate = "varimax"
)
round(pca_rot$loadings[,], 3)

##      RC1   RC2
## X1 0.854 0.384
## X2 0.848 0.296
## X3 0.913 0.092
## X4 0.856 0.245
## X5 0.481 0.516
## X7 0.141 0.940

print(pca_rot)

## Principal Components Analysis
## Call: principal(r = data_scaled, nfactors = 2, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##     RC1  RC2   h2    u2 com
## X1 0.85 0.38 0.88 0.122 1.4
## X2 0.85 0.30 0.81 0.193 1.2
## X3 0.91 0.09 0.84 0.158 1.0
## X4 0.86 0.25 0.79 0.207 1.2
## X5 0.48 0.52 0.50 0.503 2.0
## X7 0.14 0.94 0.90 0.096 1.0
## 
##                        RC1  RC2
## SS loadings           3.27 1.45
## Proportion Var        0.54 0.24
## Cumulative Var        0.54 0.79
## Proportion Explained  0.69 0.31
## Cumulative Proportion 0.69 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.06 
##  with the empirical chi square  16.31  with prob <  0.0026 
## 
## Fit based upon off diagonal values = 0.99

Loading rot

loadings_rot <- as.data.frame(round(pca_rot$loadings[,], 3))
loadings_rot$Variabel <- rownames(loadings_rot)
loadings_rot

##      RC1   RC2 Variabel
## X1 0.854 0.384       X1
## X2 0.848 0.296       X2
## X3 0.913 0.092       X3
## X4 0.856 0.245       X4
## X5 0.481 0.516       X5
## X7 0.141 0.940       X7

Loading dominan

loadings_rot$Dominan <- ifelse(
  abs(loadings_rot$RC1) > abs(loadings_rot$RC2),
  "RC1", "RC2"
)
loadings_rot

##      RC1   RC2 Variabel Dominan
## X1 0.854 0.384       X1     RC1
## X2 0.848 0.296       X2     RC1
## X3 0.913 0.092       X3     RC1
## X4 0.856 0.245       X4     RC1
## X5 0.481 0.516       X5     RC2
## X7 0.141 0.940       X7     RC2

Visualisasi

#visualisasi loading
fa.diagram(pca_rot)

#biplot PCA
biplot(pca_rot, main = "Biplot PCA setelah Varimax")

fa_final <- fa(
  data_num,
  nfactors = 2,
  rotate = "varimax",
  fm = "ml"
)
print(fa_final)

## Factor Analysis using method =  ml
## Call: fa(r = data_num, nfactors = 2, rotate = "varimax", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##     ML2  ML1   h2    u2 com
## X1 0.55 0.83 1.00 0.005 1.7
## X2 0.88 0.34 0.89 0.110 1.3
## X3 0.60 0.58 0.70 0.304 2.0
## X4 0.80 0.38 0.78 0.217 1.4
## X5 0.22 0.63 0.44 0.556 1.2
## X7 0.36 0.30 0.22 0.778 1.9
## 
##                        ML2  ML1
## SS loadings           2.25 1.78
## Proportion Var        0.38 0.30
## Cumulative Var        0.38 0.67
## Proportion Explained  0.56 0.44
## Cumulative Proportion 0.56 1.00
## 
## Mean item complexity =  1.6
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  15  with the objective function =  4.29 with Chi Square =  583.67
## df of  the model are 4  and the objective function was  0.11 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  0.08 
## 
## The harmonic n.obs is  140 with the empirical chi square  3.72  with prob <  0.44 
## The total n.obs was  140  with Likelihood Chi Square =  15.16  with prob <  0.0044 
## 
## Tucker Lewis Index of factoring reliability =  0.926
## RMSEA index =  0.141  and the 90 % confidence intervals are  0.071 0.221
## BIC =  -4.61
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    ML2  ML1
## Correlation of (regression) scores with factors   0.92 0.96
## Multiple R square of scores with factors          0.85 0.93
## Minimum correlation of possible factor scores     0.71 0.85

PCA dan FA pada World Happiness Report 2024

Juli Yawati Manalu

2026-02-23

Load data

Handling Missing Value

Statistika Deskriptif

Matriks Correlation

KMO Test (Iterasi 1)

Hapus Variabel dengan MSA <0.5

KMO iterasi 2

BartleTt Test Setelah X6 dihapus

Standarisasi data

PCA awal

Scree plot

PCA final

Loading rot

Loading dominan

Visualisasi