Package
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
Load Dataset
df <- read.csv("Stunting NTT Datasett - new.csv")
df_bersih <- df %>%
select(
Stunting = Prevalensi.Stunting....,
Miskin = Persentase.Penduduk.Miskin,
HLS = Harapan.Lama.Sekolah,
RLS = Rata.rata.Lamanya.Sekolah,
IPM = Indeks.Pembangunan.Manusia..IPM.,
Kepadatan = Kepadatan.Penduduk..Jiwa.km2.,
Air_Layak = Air_Layak,
Sanitasi = Sanitasi,
Wasting = Persentase.Balita.Wasting,
ASI = Diberi.Asi
)
tabel_deskriptif <- describe(df_bersih)
print(tabel_deskriptif[, c("n", "mean", "sd", "median", "min", "max")])
## n mean sd median min max
## Stunting 88 16.75 6.63 16.00 7.00 39.20
## Miskin 88 20.82 6.78 21.74 8.24 34.27
## HLS 88 13.14 0.87 13.00 12.24 16.54
## RLS 88 7.75 1.04 7.62 6.35 11.64
## IPM 88 65.32 4.09 64.84 57.03 81.08
## Kepadatan 88 251.69 552.92 121.00 35.21 2980.00
## Air_Layak 88 86.29 11.13 88.50 49.98 99.80
## Sanitasi 88 74.89 14.07 80.31 42.15 96.98
## Wasting 88 8.45 2.67 9.00 2.85 15.30
## ASI 88 96.06 3.63 96.78 83.75 100.00
Heatmap Korelasi
# Memanggil package
library(corrplot)
## corrplot 0.95 loaded
# 1. Menghitung nilai korelasi antar 10 variabel
matriks_korelasi <- cor(df_bersih)
# 2. Membuat visualisasi Heatmap
corrplot(matriks_korelasi,
method = "color", # Bentuk visualisasi berupa warna
type = "upper", # Hanya menampilkan segitiga atas agar tidak dobel
addCoef.col = "black", # Menampilkan angka korelasi di dalam kotak
tl.col = "black", # Warna teks label (nama variabel)
tl.srt = 45, # Memiringkan teks label 45 derajat
diag = FALSE, # Menyembunyikan nilai korelasi dengan diri sendiri (nilai 1)
number.cex = 0.7, # Memperkecil ukuran font angka agar tidak bertabrakan
title = "Matriks Korelasi Indikator Stunting NTT",
mar = c(0,0,1,0)) # Mengatur margin judul

Membuat Boxplot Distribusi (Deteksi Outlier)
# Memanggil package
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(tidyr)
# 1. Menstandarisasi data (Z-score) agar skala seragam untuk visualisasi
df_scaled <- as.data.frame(scale(df_bersih))
# 2. Mengubah format data dari format tabel biasa (wide) ke format panjang (long)
df_long <- pivot_longer(df_scaled,
cols = everything(),
names_to = "Variabel",
values_to = "Nilai_Z")
# 3. Membuat Boxplot
ggplot(df_long, aes(x = Variabel, y = Nilai_Z, fill = Variabel)) +
geom_boxplot(alpha = 0.7,
outlier.colour = "red", # Mewarnai titik outlier dengan warna merah
outlier.shape = 16,
outlier.size = 2) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), # Memiringkan nama sumbu X
legend.position = "none") + # Menyembunyikan legend box
labs(title = "Distribusi dan Outlier Variabel (Data Terstandarisasi)",
x = "Variabel Indikator",
y = "Nilai Standar (Z-Score)")

Asumsi
Uji KMO (Kaiser-Meyer-Olkin)
# 1. Melakukan Uji KMO pada dataset yang sudah dibersihkan (10 variabel)
hasil_kmo <- KMO(df_bersih)
# 2. Menampilkan hasil
print("=== HASIL UJI KMO ===")
## [1] "=== HASIL UJI KMO ==="
print(hasil_kmo$MSA) # MSA (Measure of Sampling Adequacy) adalah nilai KMO keseluruhan
## [1] 0.6550379
Uji Bartlett (Bartlett’s Test of Sphericity)
# 1. Menghitung matriks korelasi terlebih dahulu
matriks_korelasi <- cor(df_bersih)
# 2. Melakukan Uji Bartlett
# n.obs adalah jumlah baris data kita (88 observasi)
hasil_bartlett <- cortest.bartlett(matriks_korelasi, n = nrow(df_bersih))
# 3. Menampilkan hasil
print("=== HASIL UJI BARTLETT ===")
## [1] "=== HASIL UJI BARTLETT ==="
print(hasil_bartlett)
## $chisq
## [1] 626.4588
##
## $p.value
## [1] 1.909571e-103
##
## $df
## [1] 45
PCA
Standarisasi Data (Z-Score)
# Menstandarisasi 10 variabel (Z-score)
df_scaled <- scale(df_bersih)
# Mengubahnya kembali menjadi bentuk data frame
df_scaled <- as.data.frame(df_scaled)
# Menampilkan 5 baris pertama dari data yang sudah distandarisasi
print("=== HASIL STANDARISASI DATA (5 Baris Pertama) ===")
## [1] "=== HASIL STANDARISASI DATA (5 Baris Pertama) ==="
head(round(df_scaled, 3))
## Stunting Miskin HLS RLS IPM Kepadatan Air_Layak Sanitasi Wasting
## 1 1.093 1.116 -0.020 -0.877 -0.364 -0.092 0.037 -1.462 0.059
## 2 0.339 1.306 -0.353 -0.416 0.103 -0.392 -0.599 -0.415 -1.475
## 3 0.791 0.318 0.830 -0.348 -0.222 -0.333 -0.151 0.002 0.956
## 4 2.300 0.858 -0.640 -0.973 -0.772 -0.246 -2.219 -1.759 1.065
## 5 1.244 0.265 0.210 0.200 -0.398 -0.277 0.095 -0.346 -0.125
## 6 0.188 -0.759 -0.985 -0.377 -0.623 -0.136 0.178 -0.220 1.252
## ASI
## 1 0.516
## 2 0.340
## 3 0.152
## 4 -0.060
## 5 0.067
## 6 0.351
Membentuk Matriks Korelasi (Kovariansi)
# Menghitung matriks korelasi (Kovariansi)
matriks_korelasi <- cor(df_scaled)
# Menampilkan hasil matriks (dibulatkan 3 angka di belakang koma agar rapi)
print("=== MATRIKS KORELASI / KOVARIANSI ===")
## [1] "=== MATRIKS KORELASI / KOVARIANSI ==="
round(matriks_korelasi, 3)
## Stunting Miskin HLS RLS IPM Kepadatan Air_Layak Sanitasi
## Stunting 1.000 0.195 0.201 -0.166 -0.102 0.162 -0.184 -0.213
## Miskin 0.195 1.000 -0.249 -0.549 -0.536 -0.408 -0.702 -0.610
## HLS 0.201 -0.249 1.000 0.619 0.754 0.842 0.209 0.164
## RLS -0.166 -0.549 0.619 1.000 0.845 0.785 0.473 0.554
## IPM -0.102 -0.536 0.754 0.845 1.000 0.795 0.510 0.364
## Kepadatan 0.162 -0.408 0.842 0.785 0.795 1.000 0.210 0.204
## Air_Layak -0.184 -0.702 0.209 0.473 0.510 0.210 1.000 0.587
## Sanitasi -0.213 -0.610 0.164 0.554 0.364 0.204 0.587 1.000
## Wasting 0.404 -0.296 0.154 0.093 0.032 0.220 0.208 0.401
## ASI -0.057 0.141 -0.031 -0.032 0.019 -0.092 -0.023 -0.174
## Wasting ASI
## Stunting 0.404 -0.057
## Miskin -0.296 0.141
## HLS 0.154 -0.031
## RLS 0.093 -0.032
## IPM 0.032 0.019
## Kepadatan 0.220 -0.092
## Air_Layak 0.208 -0.023
## Sanitasi 0.401 -0.174
## Wasting 1.000 -0.291
## ASI -0.291 1.000
Menghitung Eigenvalue dan Eigenvector
# Menghitung Eigenvalue dan Eigenvector dari Matriks Korelasi
hasil_eigen <- eigen(matriks_korelasi)
# Memisahkan Eigenvalue dan Eigenvector
eigenvalues <- hasil_eigen$values
eigenvectors <- hasil_eigen$vectors
# Menampilkan Eigenvalue
print("=== EIGENVALUE (Akar Laten) DARI MASING-MASING KOMPONEN ===")
## [1] "=== EIGENVALUE (Akar Laten) DARI MASING-MASING KOMPONEN ==="
round(eigenvalues, 4)
## [1] 4.2928 1.7505 1.5770 0.8793 0.5080 0.3490 0.2899 0.2010 0.1075 0.0449
# Menampilkan sebagian Eigenvector (Untuk PC1 sampai PC3 saja agar tidak kepanjangan)
print("=== EIGENVECTOR (Bobot PC1, PC2, PC3) ===")
## [1] "=== EIGENVECTOR (Bobot PC1, PC2, PC3) ==="
round(eigenvectors[, 1:3], 4)
## [,1] [,2] [,3]
## [1,] 0.0342 -0.4393 0.4977
## [2,] 0.3602 -0.3303 -0.0661
## [3,] -0.3478 -0.4389 -0.0377
## [4,] -0.4330 -0.0533 -0.1696
## [5,] -0.4293 -0.1573 -0.2179
## [6,] -0.3900 -0.3892 -0.0148
## [7,] -0.3193 0.3822 -0.0014
## [8,] -0.3094 0.4139 0.1665
## [9,] -0.1503 0.0368 0.6668
## [10,] 0.0625 -0.0863 -0.4446
Menghitung Proporsi Varians
# 1. Menghitung persentase varians dari masing-masing komponen
proporsi_varians <- (eigenvalues / sum(eigenvalues)) * 100
# 2. Menghitung varians kumulatif (gabungan)
varians_kumulatif <- cumsum(proporsi_varians)
# 3. Membuat Tabel Ringkasan (Tabel Total Variance Explained)
tabel_varians <- data.frame(
Komponen = paste0("PC", 1:length(eigenvalues)),
Eigenvalue = round(eigenvalues, 3),
Proporsi_Varians_Persen = round(proporsi_varians, 2),
Varians_Kumulatif_Persen = round(varians_kumulatif, 2)
)
# Menampilkan tabel
print("=== TABEL TOTAL VARIANCE EXPLAINED ===")
## [1] "=== TABEL TOTAL VARIANCE EXPLAINED ==="
print(tabel_varians)
## Komponen Eigenvalue Proporsi_Varians_Persen Varians_Kumulatif_Persen
## 1 PC1 4.293 42.93 42.93
## 2 PC2 1.751 17.51 60.43
## 3 PC3 1.577 15.77 76.20
## 4 PC4 0.879 8.79 85.00
## 5 PC5 0.508 5.08 90.08
## 6 PC6 0.349 3.49 93.57
## 7 PC7 0.290 2.90 96.47
## 8 PC8 0.201 2.01 98.48
## 9 PC9 0.108 1.08 99.55
## 10 PC10 0.045 0.45 100.00
Membuat Scree Plot (Visualisasi Pemilihan Komponen)
# Mengaktifkan library ggplot2 jika belum
library(ggplot2)
# Membuat Scree Plot menggunakan data tabel_varians sebelumnya
ggplot(data = tabel_varians, aes(x = 1:nrow(tabel_varians), y = Eigenvalue)) +
geom_line(color = "blue", size = 1) +
geom_point(color = "red", size = 3) +
geom_hline(yintercept = 1, linetype = "dashed", color = "darkgreen", size = 1) + # Garis batas Kaiser (Eigenvalue = 1)
scale_x_continuous(breaks = 1:nrow(tabel_varians), labels = tabel_varians$Komponen) +
labs(title = "Scree Plot: Eigenvalue vs Komponen Utama",
x = "Komponen Utama (Principal Component)",
y = "Eigenvalue") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Factor Analysis
Factor Analysis dengan Rotasi Varimax
# Memastikan package psych sudah aktif
library(psych)
# Melakukan Factor Analysis (Ekstraksi PCA dengan Rotasi Varimax)
# Menggunakan df_scaled (data yang sudah distandarisasi)
hasil_fa <- principal(df_scaled, nfactors = 3, rotate = "varimax")
# Menampilkan hasil komputasi dasar
print("=== HASIL FACTOR ANALYSIS (VARIMAX ROTATION) ===")
## [1] "=== HASIL FACTOR ANALYSIS (VARIMAX ROTATION) ==="
print(hasil_fa)
## Principal Components Analysis
## Call: principal(r = df_scaled, nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC3 h2 u2 com
## Stunting 0.20 -0.50 0.66 0.73 0.266 2.1
## Miskin -0.29 -0.81 -0.14 0.75 0.245 1.3
## HLS 0.92 0.00 0.11 0.86 0.141 1.0
## RLS 0.77 0.51 -0.08 0.86 0.145 1.8
## IPM 0.86 0.40 -0.13 0.91 0.091 1.5
## Kepadatan 0.94 0.10 0.14 0.92 0.082 1.1
## Air_Layak 0.20 0.81 0.04 0.69 0.307 1.1
## Sanitasi 0.12 0.83 0.24 0.75 0.246 1.2
## Wasting 0.07 0.22 0.86 0.80 0.199 1.1
## ASI 0.06 -0.16 -0.56 0.34 0.658 1.2
##
## RC1 RC2 RC3
## SS loadings 3.25 2.74 1.63
## Proportion Var 0.32 0.27 0.16
## Cumulative Var 0.32 0.60 0.76
## Proportion Explained 0.43 0.36 0.21
## Cumulative Proportion 0.43 0.79 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.05
## with the empirical chi square 20.97 with prob < 0.28
##
## Fit based upon off diagonal values = 0.98
Matriks Loading
# Menampilkan Matriks Loading Faktor yang sudah bersih
# cutoff = 0.4 berarti kita hanya melihat korelasi yang kuat saja (> 0.4 atau < -0.4)
# sort = TRUE agar variabel yang masuk ke kelompok yang sama dijejerkan berdekatan
print("=== ROTATED FACTOR MATRIX (MATRIKS LOADING BERSIH) ===")
## [1] "=== ROTATED FACTOR MATRIX (MATRIKS LOADING BERSIH) ==="
print(hasil_fa$loadings, cutoff = 0.4, sort = TRUE)
##
## Loadings:
## RC1 RC2 RC3
## HLS 0.920
## RLS 0.769 0.508
## IPM 0.858
## Kepadatan 0.942
## Miskin -0.807
## Air_Layak 0.808
## Sanitasi 0.826
## Stunting -0.504 0.663
## Wasting 0.863
## ASI -0.557
##
## RC1 RC2 RC3
## SS loadings 3.246 2.744 1.630
## Proportion Var 0.325 0.274 0.163
## Cumulative Var 0.325 0.599 0.762
Communalities (Proporsi Varians per Variabel)
# Mengambil nilai communalities (h2)
nilai_communalities <- hasil_fa$communality
# Membuat tabel yang rapi
tabel_communalities <- data.frame(
Variabel = names(nilai_communalities),
Communalities = round(nilai_communalities, 3)
)
print("=== TABEL COMMUNALITIES ===")
## [1] "=== TABEL COMMUNALITIES ==="
print(tabel_communalities)
## Variabel Communalities
## Stunting Stunting 0.734
## Miskin Miskin 0.755
## HLS HLS 0.859
## RLS RLS 0.855
## IPM IPM 0.909
## Kepadatan Kepadatan 0.918
## Air_Layak Air_Layak 0.693
## Sanitasi Sanitasi 0.754
## Wasting Wasting 0.801
## ASI ASI 0.342