library(psych)
library(factoextra)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(FactoMineR)
library(corrplot)
## corrplot 0.95 loaded
library(mice)
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
MUAT DATA
data_mentah <- read.csv("Life Expectancy Data.csv", stringsAsFactors = FALSE)
names(data_mentah) <- trimws(names(data_mentah))
# 18 variabel
data_pca <- data_mentah[, c(
"Adult.Mortality", "infant.deaths", "Alcohol",
"percentage.expenditure", "Hepatitis.B", "Measles",
"BMI", "under.five.deaths", "Polio", "Total.expenditure",
"Diphtheria", "HIV.AIDS", "GDP", "Population",
"thinness..1.19.years", "thinness.5.9.years",
"Income.composition.of.resources", "Schooling"
)]
colnames(data_pca) <- c(
"MortAlat", "KematianBayi", "Alkohol", "PctPengeluaran",
"HepB", "Campak", "BMI", "KematianBalita", "Polio",
"TotPengeluaran", "Difteri", "HIVAIDS", "GDP", "Populasi",
"Kurus1019", "Kurus59", "KomposisiIncome", "Pendidikan"
)
cat("Dimensi awal:", dim(data_pca), "\n")
## Dimensi awal: 2938 18
cat("Missing value per kolom:\n")
## Missing value per kolom:
print(colSums(is.na(data_pca)))
## MortAlat KematianBayi Alkohol PctPengeluaran HepB
## 10 0 194 0 553
## Campak BMI KematianBalita Polio TotPengeluaran
## 0 34 0 19 226
## Difteri HIVAIDS GDP Populasi Kurus1019
## 19 0 448 652 34
## Kurus59 KomposisiIncome Pendidikan
## 34 167 163
IMPUTASI MISSING VALUE PAKAI MEDIAN
data_lengkap <- data_pca
for (kol in colnames(data_lengkap)) {
if (any(is.na(data_lengkap[[kol]]))) {
data_lengkap[[kol]][is.na(data_lengkap[[kol]])] <- median(data_lengkap[[kol]], na.rm = TRUE)
}
}
cat("Missing setelah imputasi:", sum(is.na(data_lengkap)), "\n")
## Missing setelah imputasi: 0
cat("Dimensi akhir:", dim(data_lengkap), "\n")
## Dimensi akhir: 2938 18
STATISTIKA DESKRIPTIF & BOXPLOT
desc <- describe(data_lengkap)
print(round(desc[, c("n","mean","sd","min","max","skew","kurtosis")], 3))
## n mean sd min max skew kurtosis
## MortAlat 2938 164.73 124.09 1.00 7.230000e+02 1.18 1.76
## KematianBayi 2938 30.30 117.93 0.00 1.800000e+03 9.78 115.76
## Alkohol 2938 4.55 3.92 0.01 1.787000e+01 0.65 -0.63
## PctPengeluaran 2938 738.25 1987.91 0.00 1.947991e+04 4.65 26.51
## HepB 2938 83.02 23.00 1.00 9.900000e+01 -2.28 4.39
## Campak 2938 2419.59 11467.27 0.00 2.121830e+05 9.43 114.58
## BMI 2938 38.38 19.93 1.00 8.730000e+01 -0.23 -1.27
## KematianBalita 2938 42.04 160.45 0.00 2.500000e+03 9.48 109.49
## Polio 2938 82.62 23.37 3.00 9.900000e+01 -2.11 3.81
## TotPengeluaran 2938 5.92 2.40 0.37 1.760000e+01 0.66 1.51
## Difteri 2938 82.39 23.66 2.00 9.900000e+01 -2.08 3.60
## HIVAIDS 2938 1.74 5.08 0.10 5.060000e+01 5.39 34.80
## GDP 2938 6611.52 13296.60 1.68 1.191727e+05 3.54 15.10
## Populasi 2938 10230851.23 54022417.45 34.00 1.293859e+09 17.95 380.22
## Kurus1019 2938 4.82 4.40 0.10 2.770000e+01 1.73 4.05
## Kurus59 2938 4.85 4.49 0.10 2.860000e+01 1.79 4.44
## KomposisiIncome 2938 0.63 0.20 0.00 9.500000e-01 -1.21 1.69
## Pendidikan 2938 12.01 3.27 0.00 2.070000e+01 -0.63 1.12
boxplot(scale(data_lengkap),
main = "Gambar Boxplot ",
las = 2, col = "steelblue", cex.axis = 0.7)
UJI ASUMSI
# Matriks korelasi
mat_cor <- cor(data_lengkap)
corrplot(mat_cor, method = "color", type = "upper",
tl.cex = 0.7, addCoef.col = "black", number.cex = 0.45,
title = "Gambar Matriks Korelasi", mar = c(0,0,2,0))
# Bartlett (buang variabel sangat kolinier sebelum uji)
var_bart <- c("MortAlat","Alkohol","PctPengeluaran","HepB","Campak",
"BMI","Polio","TotPengeluaran","Difteri","HIVAIDS",
"GDP","Populasi","Kurus1019","KomposisiIncome","Pendidikan")
mat_bart <- cor(data_lengkap[, var_bart])
bart <- cortest.bartlett(mat_bart, n = nrow(data_lengkap), diag = TRUE)
cat(sprintf("\nBartlett: Chi-sq=%.3f, df=%d, p=%.6f\n",
bart$chisq, bart$df, bart$p.value))
##
## Bartlett: Chi-sq=17868.576, df=105, p=0.000000
# KMO
kmo <- KMO(mat_cor)
cat(sprintf("KMO: %.4f\n", kmo$MSA))
## KMO: 0.7787
PCA
model_pca <- prcomp(data_lengkap, scale. = TRUE)
nilai_eigen <- get_eigenvalue(model_pca)
print(round(nilai_eigen, 4))
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 5.4789 30.4381 30.4381
## Dim.2 2.5789 14.3272 44.7653
## Dim.3 1.6893 9.3849 54.1502
## Dim.4 1.3798 7.6654 61.8156
## Dim.5 1.2109 6.7270 68.5426
## Dim.6 0.8689 4.8272 73.3698
## Dim.7 0.8330 4.6279 77.9977
## Dim.8 0.7452 4.1400 82.1377
## Dim.9 0.6143 3.4126 85.5503
## Dim.10 0.5872 3.2620 88.8123
## Dim.11 0.4983 2.7685 91.5808
## Dim.12 0.4394 2.4409 94.0216
## Dim.13 0.4127 2.2925 96.3142
## Dim.14 0.3155 1.7529 98.0671
## Dim.15 0.1947 1.0815 99.1486
## Dim.16 0.0901 0.5007 99.6493
## Dim.17 0.0603 0.3350 99.9842
## Dim.18 0.0028 0.0158 100.0000
jumlah_pc <- sum(nilai_eigen$eigenvalue > 1)
cat(sprintf("Jumlah PC (Kaiser): %d | Kumulatif: %.2f%%\n",
jumlah_pc, nilai_eigen$cumulative.variance.percent[jumlah_pc]))
## Jumlah PC (Kaiser): 5 | Kumulatif: 68.54%
SCREE PLOT
fviz_screeplot(model_pca, addlabels = TRUE,
main = "Gambar Scree Plot PCA") +
geom_hline(yintercept = 100/ncol(data_lengkap),
linetype = "dashed", color = "red") +
theme_minimal()
## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.
LOADING(UNROTATED)
pca_unrot <- principal(data_lengkap, nfactors = jumlah_pc,
rotate = "none", scores = TRUE)
print(pca_unrot$loadings, cutoff = 0)
##
## Loadings:
## PC1 PC2 PC3 PC4 PC5
## MortAlat -0.525 -0.328 -0.148 0.339 0.401
## KematianBayi -0.556 0.752 0.026 -0.103 0.136
## Alkohol 0.549 0.232 -0.208 0.052 0.416
## PctPengeluaran 0.507 0.373 -0.422 0.525 -0.232
## HepB 0.293 -0.043 0.604 0.268 0.062
## Campak -0.353 0.488 -0.030 -0.130 0.130
## BMI 0.666 0.110 -0.031 -0.298 0.086
## KematianBalita -0.568 0.740 0.009 -0.099 0.142
## Polio 0.514 0.132 0.607 0.193 0.112
## TotPengeluaran 0.350 0.014 -0.128 -0.013 0.515
## Difteri 0.523 0.129 0.646 0.204 0.128
## HIVAIDS -0.327 -0.230 -0.194 0.473 0.541
## GDP 0.544 0.381 -0.365 0.511 -0.286
## Populasi -0.284 0.614 0.045 -0.131 0.150
## Kurus1019 -0.764 0.183 0.225 0.320 -0.178
## Kurus59 -0.763 0.187 0.226 0.322 -0.171
## KomposisiIncome 0.703 0.360 -0.003 -0.028 -0.024
## Pendidikan 0.746 0.326 -0.009 -0.011 0.063
##
## PC1 PC2 PC3 PC4 PC5
## SS loadings 5.479 2.579 1.689 1.380 1.211
## Proportion Var 0.304 0.143 0.094 0.077 0.067
## Cumulative Var 0.304 0.448 0.542 0.618 0.685
KOMUNALITAS PCA
print(data.frame(Variabel = colnames(data_lengkap),
h2 = round(pca_unrot$communality, 4)))
## Variabel h2
## MortAlat MortAlat 0.6808
## KematianBayi KematianBayi 0.9042
## Alkohol Alkohol 0.5734
## PctPengeluaran PctPengeluaran 0.9044
## HepB HepB 0.5285
## Campak Campak 0.3980
## BMI BMI 0.5534
## KematianBalita KematianBalita 0.9004
## Polio Polio 0.6998
## TotPengeluaran TotPengeluaran 0.4050
## Difteri Difteri 0.7659
## HIVAIDS HIVAIDS 0.7137
## GDP GDP 0.9173
## Populasi Populasi 0.4988
## Kurus1019 Kurus1019 0.8015
## Kurus59 Kurus59 0.8010
## KomposisiIncome KomposisiIncome 0.6242
## Pendidikan Pendidikan 0.6673
LOADING VARIMAX
pca_varimax <- principal(data_lengkap, nfactors = jumlah_pc,
rotate = "varimax", scores = TRUE)
cat("\nLoading PCA Varimax (cutoff 0.40)\n")
##
## Loading PCA Varimax (cutoff 0.40)
print(pca_varimax$loadings, cutoff = 0.40)
##
## Loadings:
## RC1 RC2 RC4 RC3 RC5
## MortAlat 0.766
## KematianBayi 0.930
## Alkohol 0.680
## PctPengeluaran 0.934
## HepB 0.711
## Campak 0.617
## BMI 0.594
## KematianBalita 0.925
## Polio 0.796
## TotPengeluaran 0.593
## Difteri 0.838
## HIVAIDS 0.843
## GDP 0.935
## Populasi 0.705
## Kurus1019 -0.747 0.405
## Kurus59 -0.743 0.410
## KomposisiIncome 0.480 0.406
## Pendidikan 0.556
##
## RC1 RC2 RC4 RC3 RC5
## SS loadings 3.028 2.986 2.216 2.129 1.979
## Proportion Var 0.168 0.166 0.123 0.118 0.110
## Cumulative Var 0.168 0.334 0.457 0.575 0.685
cat("\nSemua Loading PCA Varimax\n")
##
## Semua Loading PCA Varimax
print(round(pca_varimax$loadings[], 3))
## RC1 RC2 RC4 RC3 RC5
## MortAlat -0.178 -0.011 -0.182 -0.169 0.766
## KematianBayi -0.171 0.930 -0.024 -0.095 0.027
## Alkohol 0.680 0.053 0.293 0.134 0.064
## PctPengeluaran 0.166 -0.048 0.934 0.012 -0.052
## HepB -0.029 -0.147 -0.008 0.711 -0.002
## Campak -0.047 0.617 -0.047 -0.113 0.012
## BMI 0.594 -0.128 0.094 0.135 -0.396
## KematianBalita -0.173 0.925 -0.026 -0.112 0.044
## Polio 0.183 -0.067 0.083 0.796 -0.145
## TotPengeluaran 0.593 -0.012 0.022 0.098 0.207
## Difteri 0.185 -0.069 0.071 0.838 -0.138
## HIVAIDS 0.018 0.003 -0.006 -0.057 0.843
## GDP 0.145 -0.066 0.935 0.061 -0.120
## Populasi 0.002 0.705 -0.007 -0.006 -0.048
## Kurus1019 -0.747 0.405 -0.050 0.024 0.275
## Kurus59 -0.743 0.410 -0.049 0.027 0.279
## KomposisiIncome 0.480 0.009 0.406 0.277 -0.390
## Pendidikan 0.556 -0.019 0.395 0.304 -0.331
cat("\nVarians Setelah Rotasi\n")
##
## Varians Setelah Rotasi
print(round(pca_varimax$Vaccounted, 3))
## RC1 RC2 RC4 RC3 RC5
## SS loadings 3.028 2.986 2.216 2.129 1.979
## Proportion Var 0.168 0.166 0.123 0.118 0.110
## Cumulative Var 0.168 0.334 0.457 0.575 0.685
## Proportion Explained 0.245 0.242 0.180 0.173 0.160
## Cumulative Proportion 0.245 0.487 0.667 0.840 1.000
BIPLOT
fviz_pca_biplot(model_pca, repel = FALSE,
col.var = "steelblue", col.ind = "gray80",
label = "var", alpha.ind = 0.3,
title = "Gambar Biplot PCA") +
theme_minimal()
KONTRIBUSI TIAP VARIABLE
fviz_contrib(model_pca, choice = "var", axes = 1,
title = " Kontribusi ke PC1")
fviz_contrib(model_pca, choice = "var", axes = 2,
title = "Kontribusi ke PC2")
SKOR PCA
skor_pca <- as.data.frame(pca_varimax$scores)
colnames(skor_pca) <- paste0("PC", 1:jumlah_pc)
head(skor_pca)
FAKTOR ANALISIS
jumlah_faktor <- jumlah_pc
cat("Jumlah faktor:", jumlah_faktor, "\n")
## Jumlah faktor: 5
# Unrotated
fa_unrot <- fa(data_lengkap, nfactors = jumlah_faktor,
rotate = "none", fm = "pa", scores = "regression")
cat("\nLoading FA Unrotated\n")
##
## Loading FA Unrotated
print(fa_unrot$loadings, cutoff = 0)
##
## Loadings:
## PA1 PA2 PA3 PA4 PA5
## MortAlat -0.498 -0.300 -0.183 0.150 0.485
## KematianBayi -0.587 0.771 0.082 -0.193 0.118
## Alkohol 0.497 0.203 -0.090 -0.094 0.259
## PctPengeluaran 0.510 0.418 -0.537 0.375 -0.013
## HepB 0.258 -0.023 0.339 0.234 0.071
## Campak -0.320 0.355 0.003 -0.120 0.043
## BMI 0.614 0.106 0.060 -0.221 -0.002
## KematianBalita -0.598 0.755 0.061 -0.195 0.127
## Polio 0.483 0.130 0.491 0.264 0.130
## TotPengeluaran 0.303 0.022 -0.031 -0.074 0.187
## Difteri 0.507 0.137 0.579 0.314 0.172
## HIVAIDS -0.298 -0.188 -0.186 0.176 0.483
## GDP 0.549 0.428 -0.486 0.389 -0.063
## Populasi -0.264 0.463 0.071 -0.104 0.050
## Kurus1019 -0.770 0.160 0.118 0.432 -0.165
## Kurus59 -0.767 0.164 0.121 0.429 -0.154
## KomposisiIncome 0.658 0.331 0.059 -0.040 -0.029
## Pendidikan 0.706 0.309 0.060 -0.041 0.031
##
## PA1 PA2 PA3 PA4 PA5
## SS loadings 5.159 2.334 1.347 1.107 0.714
## Proportion Var 0.287 0.130 0.075 0.061 0.040
## Cumulative Var 0.287 0.416 0.491 0.553 0.592
KOMUNALITAS SPECIFIC VARIANCE
cat("\nKomunalitas (h2) & Specific Variance (Psi)\n")
##
## Komunalitas (h2) & Specific Variance (Psi)
print(data.frame(
Variabel = colnames(data_lengkap),
h2 = round(fa_unrot$communality, 4),
Psi = round(fa_unrot$uniquenesses, 4)
))
## Variabel h2 Psi
## MortAlat MortAlat 0.6300 0.3700
## KematianBayi KematianBayi 0.9965 0.0035
## Alkohol Alkohol 0.3725 0.6275
## PctPengeluaran PctPengeluaran 0.8651 0.1349
## HepB HepB 0.2421 0.7579
## Campak Campak 0.2453 0.7547
## BMI BMI 0.4409 0.5591
## KematianBalita KematianBalita 0.9856 0.0144
## Polio Polio 0.5773 0.4227
## TotPengeluaran TotPengeluaran 0.1335 0.8665
## Difteri Difteri 0.7390 0.2610
## HIVAIDS HIVAIDS 0.4232 0.5768
## GDP GDP 0.8755 0.1245
## Populasi Populasi 0.3026 0.6974
## Kurus1019 Kurus1019 0.8457 0.1543
## Kurus59 Kurus59 0.8378 0.1622
## KomposisiIncome KomposisiIncome 0.5487 0.4513
## Pendidikan Pendidikan 0.5999 0.4001
VARIMAX
fa_varimax <- fa(data_lengkap, nfactors = jumlah_faktor,
rotate = "varimax", fm = "pa", scores = "regression")
cat("\nLoading FA Varimax (cutoff 0.40)\n")
##
## Loading FA Varimax (cutoff 0.40)
print(fa_varimax$loadings, cutoff = 0.40)
##
## Loadings:
## PA1 PA2 PA4 PA3 PA5
## MortAlat 0.735
## KematianBayi 0.978
## Alkohol 0.530
## PctPengeluaran 0.902
## HepB 0.473
## Campak 0.473
## BMI 0.535
## KematianBalita 0.969
## Polio 0.717
## TotPengeluaran
## Difteri 0.827
## HIVAIDS 0.643
## GDP 0.900
## Populasi 0.545
## Kurus1019 -0.834
## Kurus59 -0.825
## KomposisiIncome 0.466
## Pendidikan 0.524
##
## PA1 PA2 PA4 PA3 PA5
## SS loadings 2.814 2.703 1.966 1.760 1.418
## Proportion Var 0.156 0.150 0.109 0.098 0.079
## Cumulative Var 0.156 0.306 0.416 0.514 0.592
cat("\nSemua Loading FA Varimax \n")
##
## Semua Loading FA Varimax
print(round(fa_varimax$loadings[], 3))
## PA1 PA2 PA4 PA3 PA5
## MortAlat -0.197 0.002 -0.153 -0.165 0.735
## KematianBayi -0.169 0.978 -0.031 -0.101 0.025
## Alkohol 0.530 0.020 0.256 0.160 -0.015
## PctPengeluaran 0.213 -0.028 0.902 0.024 -0.065
## HepB 0.029 -0.127 0.004 0.473 -0.041
## Campak -0.094 0.473 -0.028 -0.108 0.019
## BMI 0.535 -0.115 0.112 0.171 -0.316
## KematianBalita -0.170 0.969 -0.031 -0.122 0.045
## Polio 0.183 -0.071 0.075 0.717 -0.138
## TotPengeluaran 0.335 -0.060 0.085 0.098 0.031
## Difteri 0.180 -0.068 0.058 0.827 -0.120
## HIVAIDS -0.069 0.003 -0.014 -0.067 0.643
## GDP 0.203 -0.044 0.900 0.072 -0.131
## Populasi -0.062 0.545 0.008 -0.012 -0.035
## Kurus1019 -0.834 0.344 -0.029 0.005 0.178
## Kurus59 -0.825 0.349 -0.030 0.010 0.184
## KomposisiIncome 0.466 0.006 0.331 0.301 -0.362
## Pendidikan 0.524 -0.020 0.332 0.330 -0.324
cat("\nVarians FA Setelah Rotasi\n")
##
## Varians FA Setelah Rotasi
print(round(fa_varimax$Vaccounted, 3))
## PA1 PA2 PA4 PA3 PA5
## SS loadings 2.814 2.703 1.966 1.760 1.418
## Proportion Var 0.156 0.150 0.109 0.098 0.079
## Cumulative Var 0.156 0.306 0.416 0.514 0.592
## Proportion Explained 0.264 0.254 0.184 0.165 0.133
## Cumulative Proportion 0.264 0.517 0.702 0.867 1.000
SPECIFIC VARIANCE SETELAH ROTASI
print(data.frame(
Variabel = colnames(data_lengkap),
Psi = round(fa_varimax$uniquenesses, 4)
))
## Variabel Psi
## MortAlat MortAlat 0.3700
## KematianBayi KematianBayi 0.0035
## Alkohol Alkohol 0.6275
## PctPengeluaran PctPengeluaran 0.1349
## HepB HepB 0.7579
## Campak Campak 0.7547
## BMI BMI 0.5591
## KematianBalita KematianBalita 0.0144
## Polio Polio 0.4227
## TotPengeluaran TotPengeluaran 0.8665
## Difteri Difteri 0.2610
## HIVAIDS HIVAIDS 0.5768
## GDP GDP 0.1245
## Populasi Populasi 0.6974
## Kurus1019 Kurus1019 0.1543
## Kurus59 Kurus59 0.1622
## KomposisiIncome KomposisiIncome 0.4513
## Pendidikan Pendidikan 0.4001
DIAGRAM FAKTOR
fa.diagram(fa_varimax,
main = "Diagram Factor Analysis (Varimax)")
INTEPRETASI PER FACTOR
cat("\nInterpretasi Faktor (|loading| >= 0.40)\n")
##
## Interpretasi Faktor (|loading| >= 0.40)
L <- round(fa_varimax$loadings[], 3)
for (f in 1:jumlah_faktor) {
cat(sprintf("\nFaktor %d:\n", f))
idx <- abs(L[, f]) >= 0.40
print(sort(L[idx, f], decreasing = TRUE))
}
##
## Faktor 1:
## BMI Alkohol Pendidikan KomposisiIncome Kurus59
## 0.535 0.530 0.524 0.466 -0.825
## Kurus1019
## -0.834
##
## Faktor 2:
## KematianBayi KematianBalita Populasi Campak
## 0.978 0.969 0.545 0.473
##
## Faktor 3:
## PctPengeluaran GDP
## 0.902 0.900
##
## Faktor 4:
## Difteri Polio HepB
## 0.827 0.717 0.473
##
## Faktor 5:
## MortAlat HIVAIDS
## 0.735 0.643
SKOR FACTOR
skor_fa <- as.data.frame(fa_varimax$scores)
colnames(skor_fa) <- paste0("FA", 1:jumlah_faktor)
head(skor_fa)
PERBANDINGAN KOMUNALITAS PCA DAN FA
print(data.frame(
Variabel = colnames(data_lengkap),
h2_PCA = round(pca_unrot$communality, 3),
h2_FA = round(fa_unrot$communality, 3)
))
## Variabel h2_PCA h2_FA
## MortAlat MortAlat 0.681 0.630
## KematianBayi KematianBayi 0.904 0.996
## Alkohol Alkohol 0.573 0.372
## PctPengeluaran PctPengeluaran 0.904 0.865
## HepB HepB 0.529 0.242
## Campak Campak 0.398 0.245
## BMI BMI 0.553 0.441
## KematianBalita KematianBalita 0.900 0.986
## Polio Polio 0.700 0.577
## TotPengeluaran TotPengeluaran 0.405 0.134
## Difteri Difteri 0.766 0.739
## HIVAIDS HIVAIDS 0.714 0.423
## GDP GDP 0.917 0.876
## Populasi Populasi 0.499 0.303
## Kurus1019 Kurus1019 0.802 0.846
## Kurus59 Kurus59 0.801 0.838
## KomposisiIncome KomposisiIncome 0.624 0.549
## Pendidikan Pendidikan 0.667 0.600