library(psych)
library(ggplot2)
library(corrplot)
library(factoextra)
library(dplyr)
library(readr)
library(ppcor)

df <- read_csv("C:/Users/USER/Documents/PCA/World-happiness-report-2024.csv")

df_clean <- df %>%
  rename(
    Ladder_Score = `Ladder score`,
    Log_GDP      = `Explained by: Log GDP per capita`,
    Social       = `Explained by: Social support`,
    Life_Exp     = `Explained by: Healthy life expectancy`,
    Freedom      = `Explained by: Freedom to make life choices`,
    Generosity   = `Explained by: Generosity`,
    Corruption   = `Explained by: Perceptions of corruption`
  )

data_raw <- df_clean %>%
  dplyr::select(Ladder_Score, Log_GDP, Social, Life_Exp, Freedom, Generosity, Corruption) %>%
  rename(X1=Ladder_Score,
         X2=Log_GDP,
         X3=Social,
         X4=Life_Exp,
         X5=Freedom,
         X6=Generosity,
         X7=Corruption)

varnames <- colnames(data_raw)

cat("Jumlah observasi Raw Data         :", nrow(data_raw), "\n")
## Jumlah observasi Raw Data         : 143
cat("Jumlah variabel                   :", ncol(data_raw), "\n")
## Jumlah variabel                   : 7
cat("Jumlah missing value              :", sum(is.na(data_raw)), "\n")
## Jumlah missing value              : 18
cat("Jumlah baris yang memiliki missing value :", sum(!complete.cases(data_raw)), "\n")
## Jumlah baris yang memiliki missing value : 3
cat("Rasio obs/var                     :", round(nrow(data_raw)/ncol(data_raw),1), ": 1\n")
## Rasio obs/var                     : 20.4 : 1

Handling Missing Value

data_num <- data_raw %>%
  na.omit()

varnames <- colnames(data_num)

cat("Jumlah observasi setelah hapus NA :", nrow(data_num), "\n")
## Jumlah observasi setelah hapus NA : 140
cat("Jumlah variabel                   :", ncol(data_num), "\n")
## Jumlah variabel                   : 7
cat("Rasio obs/var                     :", round(nrow(data_num)/ncol(data_num),1), ": 1\n")
## Rasio obs/var                     : 20 : 1

Statistika Deskriptif

desc <- describe(data_num)
round(desc[,c("n","mean","sd","min","max","skew","kurtosis")],4)
##      n mean   sd  min  max  skew kurtosis
## X1 140 5.53 1.18 1.72 7.74 -0.52    -0.29
## X2 140 1.38 0.43 0.00 2.14 -0.50    -0.42
## X3 140 1.13 0.33 0.00 1.62 -0.97     0.40
## X4 140 0.52 0.16 0.00 0.86 -0.53    -0.44
## X5 140 0.62 0.16 0.00 0.86 -1.00     1.16
## X6 140 0.15 0.07 0.00 0.40  0.65     0.72
## X7 140 0.15 0.13 0.00 0.58  1.49     1.83

Matriks Korelasi

mat_corr <- cor(data_num)
round(mat_corr,3)
##       X1     X2    X3    X4    X5     X6    X7
## X1 1.000  0.769 0.814 0.760 0.644  0.130 0.452
## X2 0.769  1.000 0.727 0.830 0.415 -0.059 0.444
## X3 0.814  0.727 1.000 0.707 0.485  0.079 0.251
## X4 0.760  0.830 0.707 1.000 0.401  0.007 0.399
## X5 0.644  0.415 0.485 0.401 1.000  0.224 0.344
## X6 0.130 -0.059 0.079 0.007 0.224  1.000 0.173
## X7 0.452  0.444 0.251 0.399 0.344  0.173 1.000
corrplot(mat_corr,
method="color",
type="upper",
addCoef.col="black")

## Uji Asumsi # Bartlett Test

bartlett <- cortest.bartlett(mat_corr,n=nrow(data_num))
bartlett
## $chisq
## [1] 601.3328
## 
## $p.value
## [1] 8.386986e-114
## 
## $df
## [1] 21

KMO Test (Iterasi 1)

kmo1 <- KMO(data_num)

cat("Overall MSA:",kmo1$MSA,"\n")
## Overall MSA: 0.8162012
round(kmo1$MSAi,3)
##    X1    X2    X3    X4    X5    X6    X7 
## 0.804 0.822 0.829 0.856 0.826 0.474 0.774

Hapus variabel dengan MSA < 0.5

msa_low <- names(kmo1$MSAi[kmo1$MSAi < 0.5])

if(length(msa_low) > 0){
  cat("Variabel dengan MSA < 0.5:", msa_low, "\n")
  data_num <- data_num %>% dplyr::select(-dplyr::all_of(msa_low))
}
## Variabel dengan MSA < 0.5: X6
varnames <- colnames(data_num)
cat("Variabel tersisa:", varnames, "\n")
## Variabel tersisa: X1 X2 X3 X4 X5 X7

KMO Iterasi 2

KMO(data_num)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_num)
## Overall MSA =  0.83
## MSA for each item = 
##   X1   X2   X3   X4   X5   X7 
## 0.80 0.84 0.83 0.85 0.82 0.80
mat_corr <- cor(data_num)

round(mat_corr,3)
##       X1    X2    X3    X4    X5    X7
## X1 1.000 0.769 0.814 0.760 0.644 0.452
## X2 0.769 1.000 0.727 0.830 0.415 0.444
## X3 0.814 0.727 1.000 0.707 0.485 0.251
## X4 0.760 0.830 0.707 1.000 0.401 0.399
## X5 0.644 0.415 0.485 0.401 1.000 0.344
## X7 0.452 0.444 0.251 0.399 0.344 1.000
KMO(data_num)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_num)
## Overall MSA =  0.83
## MSA for each item = 
##   X1   X2   X3   X4   X5   X7 
## 0.80 0.84 0.83 0.85 0.82 0.80

Bartleet Test Setelah X6 dihapus

mat_corr2 <- cor(data_num)

bartlett2 <- cortest.bartlett(mat_corr2, n = nrow(data_num))

bartlett2
## $chisq
## [1] 583.6702
## 
## $p.value
## [1] 1.043867e-114
## 
## $df
## [1] 15