library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(corrplot)
## corrplot 0.95 loaded
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(ppcor)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
df <- read_csv("WHR2024.csv")
## Rows: 143 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Country name
## dbl (10): Ladder score, upperwhisker, lowerwhisker, Explained by: Log GDP pe...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_clean <- df %>%
rename(
Ladder_Score = `Ladder score`,
Log_GDP = `Explained by: Log GDP per capita`,
Social = `Explained by: Social support`,
Life_Exp = `Explained by: Healthy life expectancy`,
Freedom = `Explained by: Freedom to make life choices`,
Generosity = `Explained by: Generosity`,
Corruption = `Explained by: Perceptions of corruption`
)
data_raw <- df_clean %>%
dplyr::select(Ladder_Score, Log_GDP, Social, Life_Exp, Freedom, Generosity, Corruption) %>%
rename(X1=Ladder_Score,
X2=Log_GDP,
X3=Social,
X4=Life_Exp,
X5=Freedom,
X6=Generosity,
X7=Corruption)
varnames <- colnames(data_raw)
cat("Jumlah observasi Raw Data :", nrow(data_raw), "\n")
## Jumlah observasi Raw Data : 143
cat("Jumlah variabel :", ncol(data_raw), "\n")
## Jumlah variabel : 7
cat("Jumlah missing value :", sum(is.na(data_raw)), "\n")
## Jumlah missing value : 18
cat("Jumlah baris yang memiliki missing value :", sum(!complete.cases(data_raw)), "\n")
## Jumlah baris yang memiliki missing value : 3
cat("Rasio obs/var :", round(nrow(data_raw)/ncol(data_raw),1), ": 1\n")
## Rasio obs/var : 20.4 : 1
data_num <- data_raw %>%
na.omit()
varnames <- colnames(data_num)
cat("Jumlah observasi setelah hapus NA :", nrow(data_num), "\n")
## Jumlah observasi setelah hapus NA : 140
cat("Jumlah variabel :", ncol(data_num), "\n")
## Jumlah variabel : 7
cat("Rasio obs/var :", round(nrow(data_num)/ncol(data_num),1), ": 1\n")
## Rasio obs/var : 20 : 1
desc <- describe(data_num)
round(desc[,c("n","mean","sd","min","max","skew","kurtosis")],4)
## n mean sd min max skew kurtosis
## X1 140 5.53 1.18 1.72 7.74 -0.52 -0.29
## X2 140 1.38 0.43 0.00 2.14 -0.50 -0.42
## X3 140 1.13 0.33 0.00 1.62 -0.97 0.40
## X4 140 0.52 0.16 0.00 0.86 -0.53 -0.44
## X5 140 0.62 0.16 0.00 0.86 -1.00 1.16
## X6 140 0.15 0.07 0.00 0.40 0.65 0.72
## X7 140 0.15 0.13 0.00 0.58 1.49 1.83
mat_corr <- cor(data_num)
round(mat_corr,3)
## X1 X2 X3 X4 X5 X6 X7
## X1 1.000 0.769 0.814 0.760 0.644 0.130 0.452
## X2 0.769 1.000 0.727 0.830 0.415 -0.059 0.444
## X3 0.814 0.727 1.000 0.707 0.485 0.079 0.251
## X4 0.760 0.830 0.707 1.000 0.401 0.007 0.399
## X5 0.644 0.415 0.485 0.401 1.000 0.224 0.344
## X6 0.130 -0.059 0.079 0.007 0.224 1.000 0.173
## X7 0.452 0.444 0.251 0.399 0.344 0.173 1.000
corrplot(mat_corr,
method="color",
type="upper",
addCoef.col="black")
bartlett <- cortest.bartlett(mat_corr,n=nrow(data_num))
bartlett
## $chisq
## [1] 601.3328
##
## $p.value
## [1] 8.386986e-114
##
## $df
## [1] 21
kmo1 <- KMO(data_num)
cat("Overall MSA:",kmo1$MSA,"\n")
## Overall MSA: 0.8162012
round(kmo1$MSAi,3)
## X1 X2 X3 X4 X5 X6 X7
## 0.804 0.822 0.829 0.856 0.826 0.474 0.774
msa_low <- names(kmo1$MSAi[kmo1$MSAi < 0.5])
if(length(msa_low) > 0){
cat("Variabel dengan MSA < 0.5:", msa_low, "\n")
data_num <- data_num %>% dplyr::select(-dplyr::all_of(msa_low))
}
## Variabel dengan MSA < 0.5: X6
varnames <- colnames(data_num)
cat("Variabel tersisa:", varnames, "\n")
## Variabel tersisa: X1 X2 X3 X4 X5 X7
KMO(data_num)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_num)
## Overall MSA = 0.83
## MSA for each item =
## X1 X2 X3 X4 X5 X7
## 0.80 0.84 0.83 0.85 0.82 0.80
mat_corr <- cor(data_num)
round(mat_corr,3)
## X1 X2 X3 X4 X5 X7
## X1 1.000 0.769 0.814 0.760 0.644 0.452
## X2 0.769 1.000 0.727 0.830 0.415 0.444
## X3 0.814 0.727 1.000 0.707 0.485 0.251
## X4 0.760 0.830 0.707 1.000 0.401 0.399
## X5 0.644 0.415 0.485 0.401 1.000 0.344
## X7 0.452 0.444 0.251 0.399 0.344 1.000
KMO(data_num)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_num)
## Overall MSA = 0.83
## MSA for each item =
## X1 X2 X3 X4 X5 X7
## 0.80 0.84 0.83 0.85 0.82 0.80
mat_corr2 <- cor(data_num)
bartlett2 <- cortest.bartlett(mat_corr2, n = nrow(data_num))
bartlett2
## $chisq
## [1] 583.6702
##
## $p.value
## [1] 1.043867e-114
##
## $df
## [1] 15
Sebelum masuk ke analisis selanjutnya, data distandarisasi dahulu agar tiap variabel memiliki satuan yang sama.
data_scaled <- scale(data_num)
pca_awal <- prcomp(data_scaled, center = TRUE, scale. = TRUE)
summary(pca_awal)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 1.9767 0.9016 0.8377 0.52700 0.40982 0.36373
## Proportion of Variance 0.6512 0.1355 0.1169 0.04629 0.02799 0.02205
## Cumulative Proportion 0.6512 0.7867 0.9037 0.94996 0.97795 1.00000
eigenvalues <- pca_awal$sdev^2
plot(
eigenvalues,
type = "b",
pch = 1,
xlab = "Component",
ylab = "Eigenvalues",
main = "Scree Plot PCA"
)
abline(h = 1, col = "red", lty = 2)
pca_final <- principal(
data_scaled,
nfactors = 2,
rotate = "none"
)
print(pca_final)
## Principal Components Analysis
## Call: principal(r = data_scaled, nfactors = 2, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 h2 u2 com
## X1 0.94 -0.05 0.88 0.122 1.0
## X2 0.89 -0.12 0.81 0.193 1.0
## X3 0.85 -0.33 0.84 0.158 1.3
## X4 0.87 -0.17 0.79 0.207 1.1
## X5 0.66 0.24 0.50 0.503 1.3
## X7 0.55 0.77 0.90 0.096 1.8
##
## PC1 PC2
## SS loadings 3.91 0.81
## Proportion Var 0.65 0.14
## Cumulative Var 0.65 0.79
## Proportion Explained 0.83 0.17
## Cumulative Proportion 0.83 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.06
## with the empirical chi square 16.31 with prob < 0.0026
##
## Fit based upon off diagonal values = 0.99
pca_rot <- principal(
data_scaled,
nfactors = 2,
rotate = "varimax"
)
round(pca_rot$loadings[,], 3)
## RC1 RC2
## X1 0.854 0.384
## X2 0.848 0.296
## X3 0.913 0.092
## X4 0.856 0.245
## X5 0.481 0.516
## X7 0.141 0.940
print(pca_rot)
## Principal Components Analysis
## Call: principal(r = data_scaled, nfactors = 2, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 h2 u2 com
## X1 0.85 0.38 0.88 0.122 1.4
## X2 0.85 0.30 0.81 0.193 1.2
## X3 0.91 0.09 0.84 0.158 1.0
## X4 0.86 0.25 0.79 0.207 1.2
## X5 0.48 0.52 0.50 0.503 2.0
## X7 0.14 0.94 0.90 0.096 1.0
##
## RC1 RC2
## SS loadings 3.27 1.45
## Proportion Var 0.54 0.24
## Cumulative Var 0.54 0.79
## Proportion Explained 0.69 0.31
## Cumulative Proportion 0.69 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.06
## with the empirical chi square 16.31 with prob < 0.0026
##
## Fit based upon off diagonal values = 0.99
loadings_rot <- as.data.frame(round(pca_rot$loadings[,], 3))
loadings_rot$Variabel <- rownames(loadings_rot)
loadings_rot
## RC1 RC2 Variabel
## X1 0.854 0.384 X1
## X2 0.848 0.296 X2
## X3 0.913 0.092 X3
## X4 0.856 0.245 X4
## X5 0.481 0.516 X5
## X7 0.141 0.940 X7
loadings_rot$Dominan <- ifelse(
abs(loadings_rot$RC1) > abs(loadings_rot$RC2),
"RC1", "RC2"
)
loadings_rot
## RC1 RC2 Variabel Dominan
## X1 0.854 0.384 X1 RC1
## X2 0.848 0.296 X2 RC1
## X3 0.913 0.092 X3 RC1
## X4 0.856 0.245 X4 RC1
## X5 0.481 0.516 X5 RC2
## X7 0.141 0.940 X7 RC2
#visualisasi loading
fa.diagram(pca_rot)
#biplot PCA
biplot(pca_rot, main = "Biplot PCA setelah Varimax")
fa_final <- fa(
data_num,
nfactors = 2,
rotate = "varimax",
fm = "ml"
)
print(fa_final)
## Factor Analysis using method = ml
## Call: fa(r = data_num, nfactors = 2, rotate = "varimax", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML2 ML1 h2 u2 com
## X1 0.55 0.83 1.00 0.005 1.7
## X2 0.88 0.34 0.89 0.110 1.3
## X3 0.60 0.58 0.70 0.304 2.0
## X4 0.80 0.38 0.78 0.217 1.4
## X5 0.22 0.63 0.44 0.556 1.2
## X7 0.36 0.30 0.22 0.778 1.9
##
## ML2 ML1
## SS loadings 2.25 1.78
## Proportion Var 0.38 0.30
## Cumulative Var 0.38 0.67
## Proportion Explained 0.56 0.44
## Cumulative Proportion 0.56 1.00
##
## Mean item complexity = 1.6
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 15 with the objective function = 4.29 with Chi Square = 583.67
## df of the model are 4 and the objective function was 0.11
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.08
##
## The harmonic n.obs is 140 with the empirical chi square 3.72 with prob < 0.44
## The total n.obs was 140 with Likelihood Chi Square = 15.16 with prob < 0.0044
##
## Tucker Lewis Index of factoring reliability = 0.926
## RMSEA index = 0.141 and the 90 % confidence intervals are 0.071 0.221
## BIC = -4.61
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## ML2 ML1
## Correlation of (regression) scores with factors 0.92 0.96
## Multiple R square of scores with factors 0.85 0.93
## Minimum correlation of possible factor scores 0.71 0.85