1. Pendahuluan

Analisis ini bertujuan untuk melakukan preprocessing, analisis deskriptif, dan analisis multivariat pada data SUSENAS 2019. Dataset terdiri dari informasi pengeluaran rumah tangga, konsumsi, dan variabel terkait lainnya.

2. Preprocessing Data

2.1 Muat Dataset

library(foreign)
library(dplyr)

# Baca dataset (file blok43.dbf)
susenas_raw <- read.dbf("D:/Nomor 6/blok43.dbf")

# Tampilkan struktur awal dataset
str(susenas_raw)
## 'data.frame':    340032 obs. of  18 variables:
##  $ RENUM     : int  285340 285346 285337 285334 285331 285319 285322 285325 285343 285328 ...
##  $ R101      : int  11 11 11 11 11 11 11 11 11 11 ...
##  $ R102      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ R105      : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ R203      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ R301      : int  4 4 3 2 2 1 5 4 4 3 ...
##  $ FOOD      : num  1795114 2108331 1810200 1561971 1178940 ...
##  $ NONFOOD   : num  1183000 868198 1074350 790975 778892 ...
##  $ EXPEND    : num  2978114 2976530 2884550 2352946 1957832 ...
##  $ KAPITA    : num  744529 744132 961517 1176473 978916 ...
##  $ KALORI_KAP: num  2436 2451 2496 3385 3555 ...
##  $ PROTE_KAP : num  63.9 78.4 74.5 109.2 105.3 ...
##  $ LEMAK_KAP : num  49.3 48.2 45.3 82.6 59 ...
##  $ KARBO_KAP : num  397 404 419 506 612 ...
##  $ WERT      : num  35.2 36.6 35.5 35 31 ...
##  $ WEIND     : num  140.8 146.6 106.6 70 61.9 ...
##  $ WI1       : int  9976 9976 9976 9976 9976 9976 9976 9976 9976 9976 ...
##  $ WI2       : int  177146 60810 99379 141157 123223 154278 90478 206467 24522 279725 ...
##  - attr(*, "data_types")= chr [1:18] "N" "N" "N" "N" ...

2.2 Pembersihan Data

# Pilih variabel yang relevan
selected_vars <- c("RENUM", "R101", "R102", "R105", "R203", "R301", 
                   "FOOD", "NONFOOD", "EXPEND", "KAPITA", 
                   "KALORI_KAP", "PROTE_KAP", "LEMAK_KAP", "KARBO_KAP", 
                   "WERT", "WEIND", "WI1", "WI2")
susenas_cleaned <- susenas_raw %>% select(all_of(selected_vars))

# Hapus data dengan nilai NA
susenas_cleaned <- na.omit(susenas_cleaned)

# Tampilkan ringkasan data setelah pembersihan
summary(susenas_cleaned)
##      RENUM             R101            R102            R105            R203  
##  Min.   :     1   Min.   :11.00   Min.   : 1.00   Min.   :1.000   Min.   :1  
##  1st Qu.: 85009   1st Qu.:18.00   1st Qu.: 4.00   1st Qu.:1.000   1st Qu.:1  
##  Median :170017   Median :35.00   Median :10.00   Median :2.000   Median :1  
##  Mean   :170017   Mean   :43.05   Mean   :21.68   Mean   :1.579   Mean   :1  
##  3rd Qu.:255024   3rd Qu.:64.00   3rd Qu.:23.00   3rd Qu.:2.000   3rd Qu.:1  
##  Max.   :340032   Max.   :94.00   Max.   :79.00   Max.   :2.000   Max.   :1  
##       R301             FOOD             NONFOOD              EXPEND         
##  Min.   : 1.000   Min.   :  114857   Min.   :    38208   Min.   :   182190  
##  1st Qu.: 3.000   1st Qu.: 1295486   1st Qu.:   857667   1st Qu.:  2277443  
##  Median : 4.000   Median : 1916079   Median :  1403417   Median :  3429452  
##  Mean   : 3.757   Mean   : 2226646   Mean   :  2142186   Mean   :  4368832  
##  3rd Qu.: 5.000   3rd Qu.: 2785714   3rd Qu.:  2393183   3rd Qu.:  5212515  
##  Max.   :29.000   Max.   :31272857   Max.   :193333898   Max.   :201254112  
##      KAPITA           KALORI_KAP     PROTE_KAP         LEMAK_KAP      
##  Min.   :  114515   Min.   :1000   Min.   :  4.166   Min.   :  2.023  
##  1st Qu.:  656004   1st Qu.:1737   1st Qu.: 47.371   1st Qu.: 38.230  
##  Median :  997299   Median :2116   Median : 59.678   Median : 51.136  
##  Mean   : 1308460   Mean   :2217   Mean   : 64.088   Mean   : 55.374  
##  3rd Qu.: 1543848   3rd Qu.:2580   3rd Qu.: 75.468   3rd Qu.: 67.453  
##  Max.   :94740858   Max.   :4500   Max.   :364.666   Max.   :293.561  
##    KARBO_KAP            WERT              WEIND                WI1       
##  Min.   :  25.66   Min.   :   1.165   Min.   :    1.165   Min.   :    1  
##  1st Qu.: 254.84   1st Qu.:  67.080   1st Qu.:  212.398   1st Qu.: 7180  
##  Median : 312.18   Median : 141.845   Median :  474.874   Median :15780  
##  Mean   : 327.74   Mean   : 222.376   Mean   :  798.704   Mean   :15840  
##  3rd Qu.: 382.61   3rd Qu.: 296.702   3rd Qu.: 1011.605   3rd Qu.:24378  
##  Max.   :1042.51   Max.   :2082.520   Max.   :22907.723   Max.   :32974  
##       WI2        
##  Min.   :     1  
##  1st Qu.: 71016  
##  Median :156026  
##  Mean   :156601  
##  3rd Qu.:241034  
##  Max.   :326043

3. Analisis Deskriptif

3.1 Statistik Deskriptif

# Statistik deskriptif untuk variabel numerik
summary(select(susenas_cleaned, where(is.numeric)))
##      RENUM             R101            R102            R105            R203  
##  Min.   :     1   Min.   :11.00   Min.   : 1.00   Min.   :1.000   Min.   :1  
##  1st Qu.: 85009   1st Qu.:18.00   1st Qu.: 4.00   1st Qu.:1.000   1st Qu.:1  
##  Median :170017   Median :35.00   Median :10.00   Median :2.000   Median :1  
##  Mean   :170017   Mean   :43.05   Mean   :21.68   Mean   :1.579   Mean   :1  
##  3rd Qu.:255024   3rd Qu.:64.00   3rd Qu.:23.00   3rd Qu.:2.000   3rd Qu.:1  
##  Max.   :340032   Max.   :94.00   Max.   :79.00   Max.   :2.000   Max.   :1  
##       R301             FOOD             NONFOOD              EXPEND         
##  Min.   : 1.000   Min.   :  114857   Min.   :    38208   Min.   :   182190  
##  1st Qu.: 3.000   1st Qu.: 1295486   1st Qu.:   857667   1st Qu.:  2277443  
##  Median : 4.000   Median : 1916079   Median :  1403417   Median :  3429452  
##  Mean   : 3.757   Mean   : 2226646   Mean   :  2142186   Mean   :  4368832  
##  3rd Qu.: 5.000   3rd Qu.: 2785714   3rd Qu.:  2393183   3rd Qu.:  5212515  
##  Max.   :29.000   Max.   :31272857   Max.   :193333898   Max.   :201254112  
##      KAPITA           KALORI_KAP     PROTE_KAP         LEMAK_KAP      
##  Min.   :  114515   Min.   :1000   Min.   :  4.166   Min.   :  2.023  
##  1st Qu.:  656004   1st Qu.:1737   1st Qu.: 47.371   1st Qu.: 38.230  
##  Median :  997299   Median :2116   Median : 59.678   Median : 51.136  
##  Mean   : 1308460   Mean   :2217   Mean   : 64.088   Mean   : 55.374  
##  3rd Qu.: 1543848   3rd Qu.:2580   3rd Qu.: 75.468   3rd Qu.: 67.453  
##  Max.   :94740858   Max.   :4500   Max.   :364.666   Max.   :293.561  
##    KARBO_KAP            WERT              WEIND                WI1       
##  Min.   :  25.66   Min.   :   1.165   Min.   :    1.165   Min.   :    1  
##  1st Qu.: 254.84   1st Qu.:  67.080   1st Qu.:  212.398   1st Qu.: 7180  
##  Median : 312.18   Median : 141.845   Median :  474.874   Median :15780  
##  Mean   : 327.74   Mean   : 222.376   Mean   :  798.704   Mean   :15840  
##  3rd Qu.: 382.61   3rd Qu.: 296.702   3rd Qu.: 1011.605   3rd Qu.:24378  
##  Max.   :1042.51   Max.   :2082.520   Max.   :22907.723   Max.   :32974  
##       WI2        
##  Min.   :     1  
##  1st Qu.: 71016  
##  Median :156026  
##  Mean   :156601  
##  3rd Qu.:241034  
##  Max.   :326043
# Distribusi variabel kategorikal (contoh: R203)
table(susenas_cleaned$R203)
## 
##      1 
## 340032

3.2 Visualisasi Deskriptif

library(ggplot2)

# Visualisasi distribusi pengeluaran makanan
ggplot(susenas_cleaned, aes(x = FOOD)) +
  geom_histogram(binwidth = 500000, fill = "blue", alpha = 0.7) +
  labs(title = "Distribusi Pengeluaran Makanan", x = "Pengeluaran Makanan", y = "Frekuensi") +
  theme_minimal()

4. Analisis Multivariat: K-Means Clustering

4.1 Normalisasi Data

# Pilih variabel numerik untuk clustering
var_cluster <- c("FOOD", "NONFOOD", "EXPEND", "KAPITA", 
                 "KALORI_KAP", "PROTE_KAP", "LEMAK_KAP", "KARBO_KAP")
cluster_data <- susenas_cleaned[, var_cluster]

# Normalisasi data
cluster_data_normalized <- scale(cluster_data)

4.2 Penentuan Jumlah Cluster Optimal

set.seed(123)
wss <- sapply(1:10, function(k) {
  kmeans(cluster_data_normalized, centers = k, nstart = 25)$tot.withinss
})

# Plot elbow method
plot(1:10, wss, type = "b", pch = 19, frame = FALSE,
     xlab = "Number of Clusters (k)",
     ylab = "Total Within-Cluster Sum of Squares",
     main = "Elbow Method for Optimal k")

4.3 Clustering dan Visualisasi

library(factoextra)

# Lakukan clustering (3 cluster)
kmeans_result <- kmeans(cluster_data_normalized, centers = 3, nstart = 25)

# Tambahkan hasil cluster ke data
susenas_cleaned$Cluster <- as.factor(kmeans_result$cluster)

# Visualisasi clustering
fviz_cluster(kmeans_result, data = cluster_data_normalized, 
             geom = "point", ellipse.type = "convex", 
             palette = "jco", ggtheme = theme_minimal()) +
  labs(title = "K-Means Clustering")

4.4 Analisis Hasil

# Distribusi data per cluster
table(susenas_cleaned$Cluster)
## 
##      1      2      3 
## 101550  16406 222076
# Ringkasan statistik tiap cluster
aggregate(cluster_data, by = list(Cluster = susenas_cleaned$Cluster), FUN = mean)
##   Cluster    FOOD  NONFOOD   EXPEND    KAPITA KALORI_KAP PROTE_KAP LEMAK_KAP
## 1       1 2464382  1974974  4439356 1693360.9   2976.117  88.59248  78.14801
## 2       2 5517235 10824287 16341522 4352082.1   2523.204  82.54083  74.04720
## 3       3 1874841  1577253  3452094  907604.3   1847.794  51.51926  43.58053
##   KARBO_KAP
## 1  430.4330
## 2  335.7839
## 3  280.1846

5. Kesimpulan

K-Means clustering berhasil mengelompokkan rumah tangga menjadi tiga kelompok berdasarkan pengeluaran dan konsumsi mereka. Analisis ini memberikan wawasan awal tentang pola pengeluaran dan konsumsi masyarakat dalam data SUSENAS 2019.