Package

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)

Load Dataset

df <- read.csv("Stunting NTT Datasett - new.csv")

df_bersih <- df %>%
  select(
    Stunting = Prevalensi.Stunting...., 
    Miskin = Persentase.Penduduk.Miskin,
    HLS = Harapan.Lama.Sekolah,
    RLS = Rata.rata.Lamanya.Sekolah,
    IPM = Indeks.Pembangunan.Manusia..IPM.,
    Kepadatan = Kepadatan.Penduduk..Jiwa.km2.,
    Air_Layak = Air_Layak,
    Sanitasi = Sanitasi,
    Wasting = Persentase.Balita.Wasting,
    ASI = Diberi.Asi
  )

tabel_deskriptif <- describe(df_bersih)

print(tabel_deskriptif[, c("n", "mean", "sd", "median", "min", "max")])
##            n   mean     sd median   min     max
## Stunting  88  16.75   6.63  16.00  7.00   39.20
## Miskin    88  20.82   6.78  21.74  8.24   34.27
## HLS       88  13.14   0.87  13.00 12.24   16.54
## RLS       88   7.75   1.04   7.62  6.35   11.64
## IPM       88  65.32   4.09  64.84 57.03   81.08
## Kepadatan 88 251.69 552.92 121.00 35.21 2980.00
## Air_Layak 88  86.29  11.13  88.50 49.98   99.80
## Sanitasi  88  74.89  14.07  80.31 42.15   96.98
## Wasting   88   8.45   2.67   9.00  2.85   15.30
## ASI       88  96.06   3.63  96.78 83.75  100.00

Heatmap Korelasi

# Memanggil package
library(corrplot)
## corrplot 0.95 loaded
# 1. Menghitung nilai korelasi antar 10 variabel
matriks_korelasi <- cor(df_bersih)

# 2. Membuat visualisasi Heatmap
corrplot(matriks_korelasi, 
         method = "color",       # Bentuk visualisasi berupa warna
         type = "upper",         # Hanya menampilkan segitiga atas agar tidak dobel
         addCoef.col = "black",  # Menampilkan angka korelasi di dalam kotak
         tl.col = "black",       # Warna teks label (nama variabel)
         tl.srt = 45,            # Memiringkan teks label 45 derajat
         diag = FALSE,           # Menyembunyikan nilai korelasi dengan diri sendiri (nilai 1)
         number.cex = 0.7,       # Memperkecil ukuran font angka agar tidak bertabrakan
         title = "Matriks Korelasi Indikator Stunting NTT",
         mar = c(0,0,1,0))       # Mengatur margin judul

Membuat Boxplot Distribusi (Deteksi Outlier)

# Memanggil package
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(tidyr)

# 1. Menstandarisasi data (Z-score) agar skala seragam untuk visualisasi
df_scaled <- as.data.frame(scale(df_bersih))

# 2. Mengubah format data dari format tabel biasa (wide) ke format panjang (long)
df_long <- pivot_longer(df_scaled, 
                        cols = everything(),
                        names_to = "Variabel", 
                        values_to = "Nilai_Z")

# 3. Membuat Boxplot
ggplot(df_long, aes(x = Variabel, y = Nilai_Z, fill = Variabel)) +
  geom_boxplot(alpha = 0.7, 
               outlier.colour = "red",  # Mewarnai titik outlier dengan warna merah
               outlier.shape = 16, 
               outlier.size = 2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), # Memiringkan nama sumbu X
        legend.position = "none") +                        # Menyembunyikan legend box
  labs(title = "Distribusi dan Outlier Variabel (Data Terstandarisasi)",
       x = "Variabel Indikator",
       y = "Nilai Standar (Z-Score)")

Asumsi

Uji KMO (Kaiser-Meyer-Olkin)

# 1. Melakukan Uji KMO pada dataset yang sudah dibersihkan (10 variabel)
hasil_kmo <- KMO(df_bersih)

# 2. Menampilkan hasil
print("=== HASIL UJI KMO ===")
## [1] "=== HASIL UJI KMO ==="
print(hasil_kmo$MSA)  # MSA (Measure of Sampling Adequacy) adalah nilai KMO keseluruhan 
## [1] 0.6550379

Uji Bartlett (Bartlett’s Test of Sphericity)

# 1. Menghitung matriks korelasi terlebih dahulu
matriks_korelasi <- cor(df_bersih)

# 2. Melakukan Uji Bartlett
# n.obs adalah jumlah baris data kita (88 observasi)
hasil_bartlett <- cortest.bartlett(matriks_korelasi, n = nrow(df_bersih))

# 3. Menampilkan hasil
print("=== HASIL UJI BARTLETT ===")
## [1] "=== HASIL UJI BARTLETT ==="
print(hasil_bartlett)
## $chisq
## [1] 626.4588
## 
## $p.value
## [1] 1.909571e-103
## 
## $df
## [1] 45

PCA

Standarisasi Data (Z-Score)

# Menstandarisasi 10 variabel (Z-score)
df_scaled <- scale(df_bersih)

# Mengubahnya kembali menjadi bentuk data frame
df_scaled <- as.data.frame(df_scaled)

# Menampilkan 5 baris pertama dari data yang sudah distandarisasi
print("=== HASIL STANDARISASI DATA (5 Baris Pertama) ===")
## [1] "=== HASIL STANDARISASI DATA (5 Baris Pertama) ==="
head(round(df_scaled, 3))
##   Stunting Miskin    HLS    RLS    IPM Kepadatan Air_Layak Sanitasi Wasting
## 1    1.093  1.116 -0.020 -0.877 -0.364    -0.092     0.037   -1.462   0.059
## 2    0.339  1.306 -0.353 -0.416  0.103    -0.392    -0.599   -0.415  -1.475
## 3    0.791  0.318  0.830 -0.348 -0.222    -0.333    -0.151    0.002   0.956
## 4    2.300  0.858 -0.640 -0.973 -0.772    -0.246    -2.219   -1.759   1.065
## 5    1.244  0.265  0.210  0.200 -0.398    -0.277     0.095   -0.346  -0.125
## 6    0.188 -0.759 -0.985 -0.377 -0.623    -0.136     0.178   -0.220   1.252
##      ASI
## 1  0.516
## 2  0.340
## 3  0.152
## 4 -0.060
## 5  0.067
## 6  0.351

Membentuk Matriks Korelasi (Kovariansi)

# Menghitung matriks korelasi (Kovariansi)
matriks_korelasi <- cor(df_scaled)

# Menampilkan hasil matriks (dibulatkan 3 angka di belakang koma agar rapi)
print("=== MATRIKS KORELASI / KOVARIANSI ===")
## [1] "=== MATRIKS KORELASI / KOVARIANSI ==="
round(matriks_korelasi, 3)
##           Stunting Miskin    HLS    RLS    IPM Kepadatan Air_Layak Sanitasi
## Stunting     1.000  0.195  0.201 -0.166 -0.102     0.162    -0.184   -0.213
## Miskin       0.195  1.000 -0.249 -0.549 -0.536    -0.408    -0.702   -0.610
## HLS          0.201 -0.249  1.000  0.619  0.754     0.842     0.209    0.164
## RLS         -0.166 -0.549  0.619  1.000  0.845     0.785     0.473    0.554
## IPM         -0.102 -0.536  0.754  0.845  1.000     0.795     0.510    0.364
## Kepadatan    0.162 -0.408  0.842  0.785  0.795     1.000     0.210    0.204
## Air_Layak   -0.184 -0.702  0.209  0.473  0.510     0.210     1.000    0.587
## Sanitasi    -0.213 -0.610  0.164  0.554  0.364     0.204     0.587    1.000
## Wasting      0.404 -0.296  0.154  0.093  0.032     0.220     0.208    0.401
## ASI         -0.057  0.141 -0.031 -0.032  0.019    -0.092    -0.023   -0.174
##           Wasting    ASI
## Stunting    0.404 -0.057
## Miskin     -0.296  0.141
## HLS         0.154 -0.031
## RLS         0.093 -0.032
## IPM         0.032  0.019
## Kepadatan   0.220 -0.092
## Air_Layak   0.208 -0.023
## Sanitasi    0.401 -0.174
## Wasting     1.000 -0.291
## ASI        -0.291  1.000

Menghitung Eigenvalue dan Eigenvector

# Menghitung Eigenvalue dan Eigenvector dari Matriks Korelasi
hasil_eigen <- eigen(matriks_korelasi)

# Memisahkan Eigenvalue dan Eigenvector
eigenvalues <- hasil_eigen$values
eigenvectors <- hasil_eigen$vectors

# Menampilkan Eigenvalue
print("=== EIGENVALUE (Akar Laten) DARI MASING-MASING KOMPONEN ===")
## [1] "=== EIGENVALUE (Akar Laten) DARI MASING-MASING KOMPONEN ==="
round(eigenvalues, 4)
##  [1] 4.2928 1.7505 1.5770 0.8793 0.5080 0.3490 0.2899 0.2010 0.1075 0.0449
# Menampilkan sebagian Eigenvector (Untuk PC1 sampai PC3 saja agar tidak kepanjangan)
print("=== EIGENVECTOR (Bobot PC1, PC2, PC3) ===")
## [1] "=== EIGENVECTOR (Bobot PC1, PC2, PC3) ==="
round(eigenvectors[, 1:3], 4)
##          [,1]    [,2]    [,3]
##  [1,]  0.0342 -0.4393  0.4977
##  [2,]  0.3602 -0.3303 -0.0661
##  [3,] -0.3478 -0.4389 -0.0377
##  [4,] -0.4330 -0.0533 -0.1696
##  [5,] -0.4293 -0.1573 -0.2179
##  [6,] -0.3900 -0.3892 -0.0148
##  [7,] -0.3193  0.3822 -0.0014
##  [8,] -0.3094  0.4139  0.1665
##  [9,] -0.1503  0.0368  0.6668
## [10,]  0.0625 -0.0863 -0.4446

Menghitung Proporsi Varians

# 1. Menghitung persentase varians dari masing-masing komponen
proporsi_varians <- (eigenvalues / sum(eigenvalues)) * 100

# 2. Menghitung varians kumulatif (gabungan)
varians_kumulatif <- cumsum(proporsi_varians)

# 3. Membuat Tabel Ringkasan (Tabel Total Variance Explained)
tabel_varians <- data.frame(
  Komponen = paste0("PC", 1:length(eigenvalues)),
  Eigenvalue = round(eigenvalues, 3),
  Proporsi_Varians_Persen = round(proporsi_varians, 2),
  Varians_Kumulatif_Persen = round(varians_kumulatif, 2)
)

# Menampilkan tabel
print("=== TABEL TOTAL VARIANCE EXPLAINED ===")
## [1] "=== TABEL TOTAL VARIANCE EXPLAINED ==="
print(tabel_varians)
##    Komponen Eigenvalue Proporsi_Varians_Persen Varians_Kumulatif_Persen
## 1       PC1      4.293                   42.93                    42.93
## 2       PC2      1.751                   17.51                    60.43
## 3       PC3      1.577                   15.77                    76.20
## 4       PC4      0.879                    8.79                    85.00
## 5       PC5      0.508                    5.08                    90.08
## 6       PC6      0.349                    3.49                    93.57
## 7       PC7      0.290                    2.90                    96.47
## 8       PC8      0.201                    2.01                    98.48
## 9       PC9      0.108                    1.08                    99.55
## 10     PC10      0.045                    0.45                   100.00

Membuat Scree Plot (Visualisasi Pemilihan Komponen)

# Mengaktifkan library ggplot2 jika belum
library(ggplot2)

# Membuat Scree Plot menggunakan data tabel_varians sebelumnya
ggplot(data = tabel_varians, aes(x = 1:nrow(tabel_varians), y = Eigenvalue)) +
  geom_line(color = "blue", size = 1) +
  geom_point(color = "red", size = 3) +
  geom_hline(yintercept = 1, linetype = "dashed", color = "darkgreen", size = 1) + # Garis batas Kaiser (Eigenvalue = 1)
  scale_x_continuous(breaks = 1:nrow(tabel_varians), labels = tabel_varians$Komponen) +
  labs(title = "Scree Plot: Eigenvalue vs Komponen Utama",
       x = "Komponen Utama (Principal Component)",
       y = "Eigenvalue") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Factor Analysis

Factor Analysis dengan Rotasi Varimax

# Memastikan package psych sudah aktif
library(psych)

# Melakukan Factor Analysis (Ekstraksi PCA dengan Rotasi Varimax)
# Menggunakan df_scaled (data yang sudah distandarisasi)
hasil_fa <- principal(df_scaled, nfactors = 3, rotate = "varimax")

# Menampilkan hasil komputasi dasar
print("=== HASIL FACTOR ANALYSIS (VARIMAX ROTATION) ===")
## [1] "=== HASIL FACTOR ANALYSIS (VARIMAX ROTATION) ==="
print(hasil_fa)
## Principal Components Analysis
## Call: principal(r = df_scaled, nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##             RC1   RC2   RC3   h2    u2 com
## Stunting   0.20 -0.50  0.66 0.73 0.266 2.1
## Miskin    -0.29 -0.81 -0.14 0.75 0.245 1.3
## HLS        0.92  0.00  0.11 0.86 0.141 1.0
## RLS        0.77  0.51 -0.08 0.86 0.145 1.8
## IPM        0.86  0.40 -0.13 0.91 0.091 1.5
## Kepadatan  0.94  0.10  0.14 0.92 0.082 1.1
## Air_Layak  0.20  0.81  0.04 0.69 0.307 1.1
## Sanitasi   0.12  0.83  0.24 0.75 0.246 1.2
## Wasting    0.07  0.22  0.86 0.80 0.199 1.1
## ASI        0.06 -0.16 -0.56 0.34 0.658 1.2
## 
##                        RC1  RC2  RC3
## SS loadings           3.25 2.74 1.63
## Proportion Var        0.32 0.27 0.16
## Cumulative Var        0.32 0.60 0.76
## Proportion Explained  0.43 0.36 0.21
## Cumulative Proportion 0.43 0.79 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.05 
##  with the empirical chi square  20.97  with prob <  0.28 
## 
## Fit based upon off diagonal values = 0.98

Matriks Loading

# Menampilkan Matriks Loading Faktor yang sudah bersih
# cutoff = 0.4 berarti kita hanya melihat korelasi yang kuat saja (> 0.4 atau < -0.4)
# sort = TRUE agar variabel yang masuk ke kelompok yang sama dijejerkan berdekatan
print("=== ROTATED FACTOR MATRIX (MATRIKS LOADING BERSIH) ===")
## [1] "=== ROTATED FACTOR MATRIX (MATRIKS LOADING BERSIH) ==="
print(hasil_fa$loadings, cutoff = 0.4, sort = TRUE)
## 
## Loadings:
##           RC1    RC2    RC3   
## HLS        0.920              
## RLS        0.769  0.508       
## IPM        0.858              
## Kepadatan  0.942              
## Miskin           -0.807       
## Air_Layak         0.808       
## Sanitasi          0.826       
## Stunting         -0.504  0.663
## Wasting                  0.863
## ASI                     -0.557
## 
##                  RC1   RC2   RC3
## SS loadings    3.246 2.744 1.630
## Proportion Var 0.325 0.274 0.163
## Cumulative Var 0.325 0.599 0.762

Communalities (Proporsi Varians per Variabel)

# Mengambil nilai communalities (h2)
nilai_communalities <- hasil_fa$communality

# Membuat tabel yang rapi
tabel_communalities <- data.frame(
  Variabel = names(nilai_communalities),
  Communalities = round(nilai_communalities, 3)
)

print("=== TABEL COMMUNALITIES ===")
## [1] "=== TABEL COMMUNALITIES ==="
print(tabel_communalities)
##            Variabel Communalities
## Stunting   Stunting         0.734
## Miskin       Miskin         0.755
## HLS             HLS         0.859
## RLS             RLS         0.855
## IPM             IPM         0.909
## Kepadatan Kepadatan         0.918
## Air_Layak Air_Layak         0.693
## Sanitasi   Sanitasi         0.754
## Wasting     Wasting         0.801
## ASI             ASI         0.342