library(foreign)
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(FactoMineR)
data_susenas <- read.dbf("blok43.dbf")
kamus_data <- read_excel("KamusDataSusenas.xlsx")
Melihat struktur data
str(data_susenas)
## 'data.frame': 340032 obs. of 18 variables:
## $ RENUM : int 285340 285346 285337 285334 285331 285319 285322 285325 285343 285328 ...
## $ R101 : int 11 11 11 11 11 11 11 11 11 11 ...
## $ R102 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ R105 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ R203 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ R301 : int 4 4 3 2 2 1 5 4 4 3 ...
## $ FOOD : num 1795114 2108331 1810200 1561971 1178940 ...
## $ NONFOOD : num 1183000 868198 1074350 790975 778892 ...
## $ EXPEND : num 2978114 2976530 2884550 2352946 1957832 ...
## $ KAPITA : num 744529 744132 961517 1176473 978916 ...
## $ KALORI_KAP: num 2436 2451 2496 3385 3555 ...
## $ PROTE_KAP : num 63.9 78.4 74.5 109.2 105.3 ...
## $ LEMAK_KAP : num 49.3 48.2 45.3 82.6 59 ...
## $ KARBO_KAP : num 397 404 419 506 612 ...
## $ WERT : num 35.2 36.6 35.5 35 31 ...
## $ WEIND : num 140.8 146.6 106.6 70 61.9 ...
## $ WI1 : int 9976 9976 9976 9976 9976 9976 9976 9976 9976 9976 ...
## $ WI2 : int 177146 60810 99379 141157 123223 154278 90478 206467 24522 279725 ...
## - attr(*, "data_types")= chr [1:18] "N" "N" "N" "N" ...
Melihat beberapa baris data
head(data_susenas)
## RENUM R101 R102 R105 R203 R301 FOOD NONFOOD EXPEND KAPITA
## 1 285340 11 1 2 1 4 1795114.3 1183000.0 2978114.3 744528.6
## 2 285346 11 1 2 1 4 2108331.4 868198.3 2976529.8 744132.4
## 3 285337 11 1 2 1 3 1810200.0 1074350.0 2884550.0 961516.7
## 4 285334 11 1 2 1 2 1561971.4 790975.0 2352946.4 1176473.2
## 5 285331 11 1 2 1 2 1178940.0 778891.7 1957831.7 978915.8
## 6 285319 11 1 2 1 1 411428.6 347100.0 758528.6 758528.6
## KALORI_KAP PROTE_KAP LEMAK_KAP KARBO_KAP WERT WEIND WI1 WI2
## 1 2435.711 63.90107 49.25109 396.8879 35.18946 140.75786 9976 177146
## 2 2451.215 78.39737 48.24964 404.1182 36.64960 146.59842 9976 60810
## 3 2495.909 74.53511 45.31679 419.1078 35.52082 106.56246 9976 99379
## 4 3384.523 109.18344 82.58239 506.0941 35.02336 70.04672 9976 141157
## 5 3554.871 105.33573 58.97906 611.9319 30.97004 61.94007 9976 123223
## 6 2751.892 77.70753 51.27606 451.6912 37.98458 37.98458 9976 154278
Memilih hanya variabel yang kita butuhkan dan cek missing values
data_susenas_selected <- data_susenas %>%
select(RENUM, R101, R102, R105, R203, R301, FOOD, NONFOOD, EXPEND,
KAPITA, KALORI_KAP, PROTE_KAP, LEMAK_KAP, KARBO_KAP, WERT, WEIND, WI1, WI2)
summary(data_susenas_selected)
## RENUM R101 R102 R105 R203
## Min. : 1 Min. :11.00 Min. : 1.00 Min. :1.000 Min. :1
## 1st Qu.: 85009 1st Qu.:18.00 1st Qu.: 4.00 1st Qu.:1.000 1st Qu.:1
## Median :170017 Median :35.00 Median :10.00 Median :2.000 Median :1
## Mean :170017 Mean :43.05 Mean :21.68 Mean :1.579 Mean :1
## 3rd Qu.:255024 3rd Qu.:64.00 3rd Qu.:23.00 3rd Qu.:2.000 3rd Qu.:1
## Max. :340032 Max. :94.00 Max. :79.00 Max. :2.000 Max. :1
## R301 FOOD NONFOOD EXPEND
## Min. : 1.000 Min. : 114857 Min. : 38208 Min. : 182190
## 1st Qu.: 3.000 1st Qu.: 1295486 1st Qu.: 857667 1st Qu.: 2277443
## Median : 4.000 Median : 1916079 Median : 1403417 Median : 3429452
## Mean : 3.757 Mean : 2226646 Mean : 2142186 Mean : 4368832
## 3rd Qu.: 5.000 3rd Qu.: 2785714 3rd Qu.: 2393183 3rd Qu.: 5212515
## Max. :29.000 Max. :31272857 Max. :193333898 Max. :201254112
## KAPITA KALORI_KAP PROTE_KAP LEMAK_KAP
## Min. : 114515 Min. :1000 Min. : 4.166 Min. : 2.023
## 1st Qu.: 656004 1st Qu.:1737 1st Qu.: 47.371 1st Qu.: 38.230
## Median : 997299 Median :2116 Median : 59.678 Median : 51.136
## Mean : 1308460 Mean :2217 Mean : 64.088 Mean : 55.374
## 3rd Qu.: 1543848 3rd Qu.:2580 3rd Qu.: 75.468 3rd Qu.: 67.453
## Max. :94740858 Max. :4500 Max. :364.666 Max. :293.561
## KARBO_KAP WERT WEIND WI1
## Min. : 25.66 Min. : 1.165 Min. : 1.165 Min. : 1
## 1st Qu.: 254.84 1st Qu.: 67.080 1st Qu.: 212.398 1st Qu.: 7180
## Median : 312.18 Median : 141.845 Median : 474.874 Median :15780
## Mean : 327.74 Mean : 222.376 Mean : 798.704 Mean :15840
## 3rd Qu.: 382.61 3rd Qu.: 296.702 3rd Qu.: 1011.605 3rd Qu.:24378
## Max. :1042.51 Max. :2082.520 Max. :22907.723 Max. :32974
## WI2
## Min. : 1
## 1st Qu.: 71016
## Median :156026
## Mean :156601
## 3rd Qu.:241034
## Max. :326043
data_susenas_clean <- data_susenas_selected %>% na.omit()
Statistik deskriptif untuk variabel numerik
summary(data_susenas_clean)
## RENUM R101 R102 R105 R203
## Min. : 1 Min. :11.00 Min. : 1.00 Min. :1.000 Min. :1
## 1st Qu.: 85009 1st Qu.:18.00 1st Qu.: 4.00 1st Qu.:1.000 1st Qu.:1
## Median :170017 Median :35.00 Median :10.00 Median :2.000 Median :1
## Mean :170017 Mean :43.05 Mean :21.68 Mean :1.579 Mean :1
## 3rd Qu.:255024 3rd Qu.:64.00 3rd Qu.:23.00 3rd Qu.:2.000 3rd Qu.:1
## Max. :340032 Max. :94.00 Max. :79.00 Max. :2.000 Max. :1
## R301 FOOD NONFOOD EXPEND
## Min. : 1.000 Min. : 114857 Min. : 38208 Min. : 182190
## 1st Qu.: 3.000 1st Qu.: 1295486 1st Qu.: 857667 1st Qu.: 2277443
## Median : 4.000 Median : 1916079 Median : 1403417 Median : 3429452
## Mean : 3.757 Mean : 2226646 Mean : 2142186 Mean : 4368832
## 3rd Qu.: 5.000 3rd Qu.: 2785714 3rd Qu.: 2393183 3rd Qu.: 5212515
## Max. :29.000 Max. :31272857 Max. :193333898 Max. :201254112
## KAPITA KALORI_KAP PROTE_KAP LEMAK_KAP
## Min. : 114515 Min. :1000 Min. : 4.166 Min. : 2.023
## 1st Qu.: 656004 1st Qu.:1737 1st Qu.: 47.371 1st Qu.: 38.230
## Median : 997299 Median :2116 Median : 59.678 Median : 51.136
## Mean : 1308460 Mean :2217 Mean : 64.088 Mean : 55.374
## 3rd Qu.: 1543848 3rd Qu.:2580 3rd Qu.: 75.468 3rd Qu.: 67.453
## Max. :94740858 Max. :4500 Max. :364.666 Max. :293.561
## KARBO_KAP WERT WEIND WI1
## Min. : 25.66 Min. : 1.165 Min. : 1.165 Min. : 1
## 1st Qu.: 254.84 1st Qu.: 67.080 1st Qu.: 212.398 1st Qu.: 7180
## Median : 312.18 Median : 141.845 Median : 474.874 Median :15780
## Mean : 327.74 Mean : 222.376 Mean : 798.704 Mean :15840
## 3rd Qu.: 382.61 3rd Qu.: 296.702 3rd Qu.: 1011.605 3rd Qu.:24378
## Max. :1042.51 Max. :2082.520 Max. :22907.723 Max. :32974
## WI2
## Min. : 1
## 1st Qu.: 71016
## Median :156026
## Mean :156601
## 3rd Qu.:241034
## Max. :326043
Visualisasi distribusi pengeluaran untuk makanan (FOOD)
ggplot(data_susenas_clean, aes(x = FOOD)) +
geom_histogram(binwidth = 10000) +
labs(title = "Distribusi Pengeluaran Makanan", x = "Pengeluaran Makanan", y = "Frekuensi")
Tabel frekuensi untuk variabel kategorik (jenis kelamin kepala rumah tangga)
table(data_susenas_clean$R102)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13
## 21499 22187 22173 21635 18431 16784 15837 15991 15019 12236 11526 10533 7991
## 14 15 16 17 18 19 20 21 22 23 24 25 26
## 6169 6547 5597 5588 5204 3073 3350 2882 3096 2352 2331 2897 2714
## 27 28 29 30 31 32 33 34 35 36 71 72 73
## 2152 1978 2445 538 525 426 530 503 520 530 23050 12719 8073
## 74 75 76 77 78 79
## 7580 5453 3287 2464 2471 1146
Melakukan PCA pada variabel terkait konsumsi dan pengeluaran per kapita dan Plot hasil PCA
pca_result <- PCA(data_susenas_clean[, c("FOOD", "NONFOOD", "EXPEND", "KAPITA", "KALORI_KAP", "PROTE_KAP", "LEMAK_KAP", "KARBO_KAP")], scale.unit = TRUE)
plot(pca_result, choix = "ind")
plot(pca_result, choix = "var")
PCA :
Dim 1 terkait pengeluaran.
Dim 2 terkait pola nutrisi.
Distribusi Pengeluaran :
Konsentrasi pada pengeluaran rendah.
Outlier terdapat pada rumah tangga dengan pengeluaran sangat tinggi.
Distribusi Pengeluaran Makanan :
Statistik Deskriptif :
Terdapat kesenjangan antara pengeluaran makanan dan non-makanan.
Sebagian besar populasi memiliki konsumsi nutrisi yang baik.
Korelasi :
Konsumsi makanan dan nutrisi memiliki hubungan yang kuat.
Variabel EXPEND dan NONFOOD menunjukkan pola berbeda dengan variabel lain.
Implikasi :
Pola konsumsi rumah tangga sangat dipengaruhi oleh pengeluaran makanan.
Analisis lanjutan dapat membantu merancang kebijakan mendukung kebutuhan rumah tangga berpengeluaran rendah.