library(psych)
library(ggplot2)
library(corrplot)
library(factoextra)
library(dplyr)
library(readr)
library(ppcor)
df <- read_csv("C:/Users/USER/Documents/PCA/World-happiness-report-2024.csv")
df_clean <- df %>%
rename(
Ladder_Score = `Ladder score`,
Log_GDP = `Explained by: Log GDP per capita`,
Social = `Explained by: Social support`,
Life_Exp = `Explained by: Healthy life expectancy`,
Freedom = `Explained by: Freedom to make life choices`,
Generosity = `Explained by: Generosity`,
Corruption = `Explained by: Perceptions of corruption`
)
data_raw <- df_clean %>%
dplyr::select(Ladder_Score, Log_GDP, Social, Life_Exp, Freedom, Generosity, Corruption) %>%
rename(X1=Ladder_Score,
X2=Log_GDP,
X3=Social,
X4=Life_Exp,
X5=Freedom,
X6=Generosity,
X7=Corruption)
varnames <- colnames(data_raw)
cat("Jumlah observasi Raw Data :", nrow(data_raw), "\n")
## Jumlah observasi Raw Data : 143
cat("Jumlah variabel :", ncol(data_raw), "\n")
## Jumlah variabel : 7
cat("Jumlah missing value :", sum(is.na(data_raw)), "\n")
## Jumlah missing value : 18
cat("Jumlah baris yang memiliki missing value :", sum(!complete.cases(data_raw)), "\n")
## Jumlah baris yang memiliki missing value : 3
cat("Rasio obs/var :", round(nrow(data_raw)/ncol(data_raw),1), ": 1\n")
## Rasio obs/var : 20.4 : 1
data_num <- data_raw %>%
na.omit()
varnames <- colnames(data_num)
cat("Jumlah observasi setelah hapus NA :", nrow(data_num), "\n")
## Jumlah observasi setelah hapus NA : 140
cat("Jumlah variabel :", ncol(data_num), "\n")
## Jumlah variabel : 7
cat("Rasio obs/var :", round(nrow(data_num)/ncol(data_num),1), ": 1\n")
## Rasio obs/var : 20 : 1
desc <- describe(data_num)
round(desc[,c("n","mean","sd","min","max","skew","kurtosis")],4)
## n mean sd min max skew kurtosis
## X1 140 5.53 1.18 1.72 7.74 -0.52 -0.29
## X2 140 1.38 0.43 0.00 2.14 -0.50 -0.42
## X3 140 1.13 0.33 0.00 1.62 -0.97 0.40
## X4 140 0.52 0.16 0.00 0.86 -0.53 -0.44
## X5 140 0.62 0.16 0.00 0.86 -1.00 1.16
## X6 140 0.15 0.07 0.00 0.40 0.65 0.72
## X7 140 0.15 0.13 0.00 0.58 1.49 1.83
mat_corr <- cor(data_num)
round(mat_corr,3)
## X1 X2 X3 X4 X5 X6 X7
## X1 1.000 0.769 0.814 0.760 0.644 0.130 0.452
## X2 0.769 1.000 0.727 0.830 0.415 -0.059 0.444
## X3 0.814 0.727 1.000 0.707 0.485 0.079 0.251
## X4 0.760 0.830 0.707 1.000 0.401 0.007 0.399
## X5 0.644 0.415 0.485 0.401 1.000 0.224 0.344
## X6 0.130 -0.059 0.079 0.007 0.224 1.000 0.173
## X7 0.452 0.444 0.251 0.399 0.344 0.173 1.000
corrplot(mat_corr,
method="color",
type="upper",
addCoef.col="black")
## Uji Asumsi # Bartlett Test
bartlett <- cortest.bartlett(mat_corr,n=nrow(data_num))
bartlett
## $chisq
## [1] 601.3328
##
## $p.value
## [1] 8.386986e-114
##
## $df
## [1] 21
kmo1 <- KMO(data_num)
cat("Overall MSA:",kmo1$MSA,"\n")
## Overall MSA: 0.8162012
round(kmo1$MSAi,3)
## X1 X2 X3 X4 X5 X6 X7
## 0.804 0.822 0.829 0.856 0.826 0.474 0.774
msa_low <- names(kmo1$MSAi[kmo1$MSAi < 0.5])
if(length(msa_low) > 0){
cat("Variabel dengan MSA < 0.5:", msa_low, "\n")
data_num <- data_num %>% dplyr::select(-dplyr::all_of(msa_low))
}
## Variabel dengan MSA < 0.5: X6
varnames <- colnames(data_num)
cat("Variabel tersisa:", varnames, "\n")
## Variabel tersisa: X1 X2 X3 X4 X5 X7
KMO(data_num)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_num)
## Overall MSA = 0.83
## MSA for each item =
## X1 X2 X3 X4 X5 X7
## 0.80 0.84 0.83 0.85 0.82 0.80
mat_corr <- cor(data_num)
round(mat_corr,3)
## X1 X2 X3 X4 X5 X7
## X1 1.000 0.769 0.814 0.760 0.644 0.452
## X2 0.769 1.000 0.727 0.830 0.415 0.444
## X3 0.814 0.727 1.000 0.707 0.485 0.251
## X4 0.760 0.830 0.707 1.000 0.401 0.399
## X5 0.644 0.415 0.485 0.401 1.000 0.344
## X7 0.452 0.444 0.251 0.399 0.344 1.000
KMO(data_num)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_num)
## Overall MSA = 0.83
## MSA for each item =
## X1 X2 X3 X4 X5 X7
## 0.80 0.84 0.83 0.85 0.82 0.80
mat_corr2 <- cor(data_num)
bartlett2 <- cortest.bartlett(mat_corr2, n = nrow(data_num))
bartlett2
## $chisq
## [1] 583.6702
##
## $p.value
## [1] 1.043867e-114
##
## $df
## [1] 15