Analisis ini bertujuan untuk melakukan preprocessing, analisis deskriptif, dan analisis multivariat pada data SUSENAS 2019. Dataset terdiri dari informasi pengeluaran rumah tangga, konsumsi, dan variabel terkait lainnya.
library(foreign)
library(dplyr)
# Baca dataset (file blok43.dbf)
susenas_raw <- read.dbf("D:/Nomor 6/blok43.dbf")
# Tampilkan struktur awal dataset
str(susenas_raw)
## 'data.frame': 340032 obs. of 18 variables:
## $ RENUM : int 285340 285346 285337 285334 285331 285319 285322 285325 285343 285328 ...
## $ R101 : int 11 11 11 11 11 11 11 11 11 11 ...
## $ R102 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ R105 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ R203 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ R301 : int 4 4 3 2 2 1 5 4 4 3 ...
## $ FOOD : num 1795114 2108331 1810200 1561971 1178940 ...
## $ NONFOOD : num 1183000 868198 1074350 790975 778892 ...
## $ EXPEND : num 2978114 2976530 2884550 2352946 1957832 ...
## $ KAPITA : num 744529 744132 961517 1176473 978916 ...
## $ KALORI_KAP: num 2436 2451 2496 3385 3555 ...
## $ PROTE_KAP : num 63.9 78.4 74.5 109.2 105.3 ...
## $ LEMAK_KAP : num 49.3 48.2 45.3 82.6 59 ...
## $ KARBO_KAP : num 397 404 419 506 612 ...
## $ WERT : num 35.2 36.6 35.5 35 31 ...
## $ WEIND : num 140.8 146.6 106.6 70 61.9 ...
## $ WI1 : int 9976 9976 9976 9976 9976 9976 9976 9976 9976 9976 ...
## $ WI2 : int 177146 60810 99379 141157 123223 154278 90478 206467 24522 279725 ...
## - attr(*, "data_types")= chr [1:18] "N" "N" "N" "N" ...
# Pilih variabel yang relevan
selected_vars <- c("RENUM", "R101", "R102", "R105", "R203", "R301",
"FOOD", "NONFOOD", "EXPEND", "KAPITA",
"KALORI_KAP", "PROTE_KAP", "LEMAK_KAP", "KARBO_KAP",
"WERT", "WEIND", "WI1", "WI2")
susenas_cleaned <- susenas_raw %>% select(all_of(selected_vars))
# Hapus data dengan nilai NA
susenas_cleaned <- na.omit(susenas_cleaned)
# Tampilkan ringkasan data setelah pembersihan
summary(susenas_cleaned)
## RENUM R101 R102 R105 R203
## Min. : 1 Min. :11.00 Min. : 1.00 Min. :1.000 Min. :1
## 1st Qu.: 85009 1st Qu.:18.00 1st Qu.: 4.00 1st Qu.:1.000 1st Qu.:1
## Median :170017 Median :35.00 Median :10.00 Median :2.000 Median :1
## Mean :170017 Mean :43.05 Mean :21.68 Mean :1.579 Mean :1
## 3rd Qu.:255024 3rd Qu.:64.00 3rd Qu.:23.00 3rd Qu.:2.000 3rd Qu.:1
## Max. :340032 Max. :94.00 Max. :79.00 Max. :2.000 Max. :1
## R301 FOOD NONFOOD EXPEND
## Min. : 1.000 Min. : 114857 Min. : 38208 Min. : 182190
## 1st Qu.: 3.000 1st Qu.: 1295486 1st Qu.: 857667 1st Qu.: 2277443
## Median : 4.000 Median : 1916079 Median : 1403417 Median : 3429452
## Mean : 3.757 Mean : 2226646 Mean : 2142186 Mean : 4368832
## 3rd Qu.: 5.000 3rd Qu.: 2785714 3rd Qu.: 2393183 3rd Qu.: 5212515
## Max. :29.000 Max. :31272857 Max. :193333898 Max. :201254112
## KAPITA KALORI_KAP PROTE_KAP LEMAK_KAP
## Min. : 114515 Min. :1000 Min. : 4.166 Min. : 2.023
## 1st Qu.: 656004 1st Qu.:1737 1st Qu.: 47.371 1st Qu.: 38.230
## Median : 997299 Median :2116 Median : 59.678 Median : 51.136
## Mean : 1308460 Mean :2217 Mean : 64.088 Mean : 55.374
## 3rd Qu.: 1543848 3rd Qu.:2580 3rd Qu.: 75.468 3rd Qu.: 67.453
## Max. :94740858 Max. :4500 Max. :364.666 Max. :293.561
## KARBO_KAP WERT WEIND WI1
## Min. : 25.66 Min. : 1.165 Min. : 1.165 Min. : 1
## 1st Qu.: 254.84 1st Qu.: 67.080 1st Qu.: 212.398 1st Qu.: 7180
## Median : 312.18 Median : 141.845 Median : 474.874 Median :15780
## Mean : 327.74 Mean : 222.376 Mean : 798.704 Mean :15840
## 3rd Qu.: 382.61 3rd Qu.: 296.702 3rd Qu.: 1011.605 3rd Qu.:24378
## Max. :1042.51 Max. :2082.520 Max. :22907.723 Max. :32974
## WI2
## Min. : 1
## 1st Qu.: 71016
## Median :156026
## Mean :156601
## 3rd Qu.:241034
## Max. :326043
# Statistik deskriptif untuk variabel numerik
summary(select(susenas_cleaned, where(is.numeric)))
## RENUM R101 R102 R105 R203
## Min. : 1 Min. :11.00 Min. : 1.00 Min. :1.000 Min. :1
## 1st Qu.: 85009 1st Qu.:18.00 1st Qu.: 4.00 1st Qu.:1.000 1st Qu.:1
## Median :170017 Median :35.00 Median :10.00 Median :2.000 Median :1
## Mean :170017 Mean :43.05 Mean :21.68 Mean :1.579 Mean :1
## 3rd Qu.:255024 3rd Qu.:64.00 3rd Qu.:23.00 3rd Qu.:2.000 3rd Qu.:1
## Max. :340032 Max. :94.00 Max. :79.00 Max. :2.000 Max. :1
## R301 FOOD NONFOOD EXPEND
## Min. : 1.000 Min. : 114857 Min. : 38208 Min. : 182190
## 1st Qu.: 3.000 1st Qu.: 1295486 1st Qu.: 857667 1st Qu.: 2277443
## Median : 4.000 Median : 1916079 Median : 1403417 Median : 3429452
## Mean : 3.757 Mean : 2226646 Mean : 2142186 Mean : 4368832
## 3rd Qu.: 5.000 3rd Qu.: 2785714 3rd Qu.: 2393183 3rd Qu.: 5212515
## Max. :29.000 Max. :31272857 Max. :193333898 Max. :201254112
## KAPITA KALORI_KAP PROTE_KAP LEMAK_KAP
## Min. : 114515 Min. :1000 Min. : 4.166 Min. : 2.023
## 1st Qu.: 656004 1st Qu.:1737 1st Qu.: 47.371 1st Qu.: 38.230
## Median : 997299 Median :2116 Median : 59.678 Median : 51.136
## Mean : 1308460 Mean :2217 Mean : 64.088 Mean : 55.374
## 3rd Qu.: 1543848 3rd Qu.:2580 3rd Qu.: 75.468 3rd Qu.: 67.453
## Max. :94740858 Max. :4500 Max. :364.666 Max. :293.561
## KARBO_KAP WERT WEIND WI1
## Min. : 25.66 Min. : 1.165 Min. : 1.165 Min. : 1
## 1st Qu.: 254.84 1st Qu.: 67.080 1st Qu.: 212.398 1st Qu.: 7180
## Median : 312.18 Median : 141.845 Median : 474.874 Median :15780
## Mean : 327.74 Mean : 222.376 Mean : 798.704 Mean :15840
## 3rd Qu.: 382.61 3rd Qu.: 296.702 3rd Qu.: 1011.605 3rd Qu.:24378
## Max. :1042.51 Max. :2082.520 Max. :22907.723 Max. :32974
## WI2
## Min. : 1
## 1st Qu.: 71016
## Median :156026
## Mean :156601
## 3rd Qu.:241034
## Max. :326043
# Distribusi variabel kategorikal (contoh: R203)
table(susenas_cleaned$R203)
##
## 1
## 340032
library(ggplot2)
# Visualisasi distribusi pengeluaran makanan
ggplot(susenas_cleaned, aes(x = FOOD)) +
geom_histogram(binwidth = 500000, fill = "blue", alpha = 0.7) +
labs(title = "Distribusi Pengeluaran Makanan", x = "Pengeluaran Makanan", y = "Frekuensi") +
theme_minimal()
# Pilih variabel numerik untuk clustering
var_cluster <- c("FOOD", "NONFOOD", "EXPEND", "KAPITA",
"KALORI_KAP", "PROTE_KAP", "LEMAK_KAP", "KARBO_KAP")
cluster_data <- susenas_cleaned[, var_cluster]
# Normalisasi data
cluster_data_normalized <- scale(cluster_data)
set.seed(123)
wss <- sapply(1:10, function(k) {
kmeans(cluster_data_normalized, centers = k, nstart = 25)$tot.withinss
})
# Plot elbow method
plot(1:10, wss, type = "b", pch = 19, frame = FALSE,
xlab = "Number of Clusters (k)",
ylab = "Total Within-Cluster Sum of Squares",
main = "Elbow Method for Optimal k")
library(factoextra)
# Lakukan clustering (3 cluster)
kmeans_result <- kmeans(cluster_data_normalized, centers = 3, nstart = 25)
# Tambahkan hasil cluster ke data
susenas_cleaned$Cluster <- as.factor(kmeans_result$cluster)
# Visualisasi clustering
fviz_cluster(kmeans_result, data = cluster_data_normalized,
geom = "point", ellipse.type = "convex",
palette = "jco", ggtheme = theme_minimal()) +
labs(title = "K-Means Clustering")
# Distribusi data per cluster
table(susenas_cleaned$Cluster)
##
## 1 2 3
## 101550 16406 222076
# Ringkasan statistik tiap cluster
aggregate(cluster_data, by = list(Cluster = susenas_cleaned$Cluster), FUN = mean)
## Cluster FOOD NONFOOD EXPEND KAPITA KALORI_KAP PROTE_KAP LEMAK_KAP
## 1 1 2464382 1974974 4439356 1693360.9 2976.117 88.59248 78.14801
## 2 2 5517235 10824287 16341522 4352082.1 2523.204 82.54083 74.04720
## 3 3 1874841 1577253 3452094 907604.3 1847.794 51.51926 43.58053
## KARBO_KAP
## 1 430.4330
## 2 335.7839
## 3 280.1846
K-Means clustering berhasil mengelompokkan rumah tangga menjadi tiga kelompok berdasarkan pengeluaran dan konsumsi mereka. Analisis ini memberikan wawasan awal tentang pola pengeluaran dan konsumsi masyarakat dalam data SUSENAS 2019.