Load Data Library

library(foreign)
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(FactoMineR)

data_susenas <- read.dbf("blok43.dbf")

kamus_data <- read_excel("KamusDataSusenas.xlsx")

Preprosesing Data

Melihat struktur data

str(data_susenas)
## 'data.frame':    340032 obs. of  18 variables:
##  $ RENUM     : int  285340 285346 285337 285334 285331 285319 285322 285325 285343 285328 ...
##  $ R101      : int  11 11 11 11 11 11 11 11 11 11 ...
##  $ R102      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ R105      : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ R203      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ R301      : int  4 4 3 2 2 1 5 4 4 3 ...
##  $ FOOD      : num  1795114 2108331 1810200 1561971 1178940 ...
##  $ NONFOOD   : num  1183000 868198 1074350 790975 778892 ...
##  $ EXPEND    : num  2978114 2976530 2884550 2352946 1957832 ...
##  $ KAPITA    : num  744529 744132 961517 1176473 978916 ...
##  $ KALORI_KAP: num  2436 2451 2496 3385 3555 ...
##  $ PROTE_KAP : num  63.9 78.4 74.5 109.2 105.3 ...
##  $ LEMAK_KAP : num  49.3 48.2 45.3 82.6 59 ...
##  $ KARBO_KAP : num  397 404 419 506 612 ...
##  $ WERT      : num  35.2 36.6 35.5 35 31 ...
##  $ WEIND     : num  140.8 146.6 106.6 70 61.9 ...
##  $ WI1       : int  9976 9976 9976 9976 9976 9976 9976 9976 9976 9976 ...
##  $ WI2       : int  177146 60810 99379 141157 123223 154278 90478 206467 24522 279725 ...
##  - attr(*, "data_types")= chr [1:18] "N" "N" "N" "N" ...

Melihat beberapa baris data

head(data_susenas)
##    RENUM R101 R102 R105 R203 R301      FOOD   NONFOOD    EXPEND    KAPITA
## 1 285340   11    1    2    1    4 1795114.3 1183000.0 2978114.3  744528.6
## 2 285346   11    1    2    1    4 2108331.4  868198.3 2976529.8  744132.4
## 3 285337   11    1    2    1    3 1810200.0 1074350.0 2884550.0  961516.7
## 4 285334   11    1    2    1    2 1561971.4  790975.0 2352946.4 1176473.2
## 5 285331   11    1    2    1    2 1178940.0  778891.7 1957831.7  978915.8
## 6 285319   11    1    2    1    1  411428.6  347100.0  758528.6  758528.6
##   KALORI_KAP PROTE_KAP LEMAK_KAP KARBO_KAP     WERT     WEIND  WI1    WI2
## 1   2435.711  63.90107  49.25109  396.8879 35.18946 140.75786 9976 177146
## 2   2451.215  78.39737  48.24964  404.1182 36.64960 146.59842 9976  60810
## 3   2495.909  74.53511  45.31679  419.1078 35.52082 106.56246 9976  99379
## 4   3384.523 109.18344  82.58239  506.0941 35.02336  70.04672 9976 141157
## 5   3554.871 105.33573  58.97906  611.9319 30.97004  61.94007 9976 123223
## 6   2751.892  77.70753  51.27606  451.6912 37.98458  37.98458 9976 154278

Memilih hanya variabel yang kita butuhkan dan cek missing values

data_susenas_selected <- data_susenas %>%
  select(RENUM, R101, R102, R105, R203, R301, FOOD, NONFOOD, EXPEND, 
         KAPITA, KALORI_KAP, PROTE_KAP, LEMAK_KAP, KARBO_KAP, WERT, WEIND, WI1, WI2)

summary(data_susenas_selected)
##      RENUM             R101            R102            R105            R203  
##  Min.   :     1   Min.   :11.00   Min.   : 1.00   Min.   :1.000   Min.   :1  
##  1st Qu.: 85009   1st Qu.:18.00   1st Qu.: 4.00   1st Qu.:1.000   1st Qu.:1  
##  Median :170017   Median :35.00   Median :10.00   Median :2.000   Median :1  
##  Mean   :170017   Mean   :43.05   Mean   :21.68   Mean   :1.579   Mean   :1  
##  3rd Qu.:255024   3rd Qu.:64.00   3rd Qu.:23.00   3rd Qu.:2.000   3rd Qu.:1  
##  Max.   :340032   Max.   :94.00   Max.   :79.00   Max.   :2.000   Max.   :1  
##       R301             FOOD             NONFOOD              EXPEND         
##  Min.   : 1.000   Min.   :  114857   Min.   :    38208   Min.   :   182190  
##  1st Qu.: 3.000   1st Qu.: 1295486   1st Qu.:   857667   1st Qu.:  2277443  
##  Median : 4.000   Median : 1916079   Median :  1403417   Median :  3429452  
##  Mean   : 3.757   Mean   : 2226646   Mean   :  2142186   Mean   :  4368832  
##  3rd Qu.: 5.000   3rd Qu.: 2785714   3rd Qu.:  2393183   3rd Qu.:  5212515  
##  Max.   :29.000   Max.   :31272857   Max.   :193333898   Max.   :201254112  
##      KAPITA           KALORI_KAP     PROTE_KAP         LEMAK_KAP      
##  Min.   :  114515   Min.   :1000   Min.   :  4.166   Min.   :  2.023  
##  1st Qu.:  656004   1st Qu.:1737   1st Qu.: 47.371   1st Qu.: 38.230  
##  Median :  997299   Median :2116   Median : 59.678   Median : 51.136  
##  Mean   : 1308460   Mean   :2217   Mean   : 64.088   Mean   : 55.374  
##  3rd Qu.: 1543848   3rd Qu.:2580   3rd Qu.: 75.468   3rd Qu.: 67.453  
##  Max.   :94740858   Max.   :4500   Max.   :364.666   Max.   :293.561  
##    KARBO_KAP            WERT              WEIND                WI1       
##  Min.   :  25.66   Min.   :   1.165   Min.   :    1.165   Min.   :    1  
##  1st Qu.: 254.84   1st Qu.:  67.080   1st Qu.:  212.398   1st Qu.: 7180  
##  Median : 312.18   Median : 141.845   Median :  474.874   Median :15780  
##  Mean   : 327.74   Mean   : 222.376   Mean   :  798.704   Mean   :15840  
##  3rd Qu.: 382.61   3rd Qu.: 296.702   3rd Qu.: 1011.605   3rd Qu.:24378  
##  Max.   :1042.51   Max.   :2082.520   Max.   :22907.723   Max.   :32974  
##       WI2        
##  Min.   :     1  
##  1st Qu.: 71016  
##  Median :156026  
##  Mean   :156601  
##  3rd Qu.:241034  
##  Max.   :326043
data_susenas_clean <- data_susenas_selected %>% na.omit()

Analisis Deskriptif

Statistik deskriptif untuk variabel numerik

summary(data_susenas_clean)
##      RENUM             R101            R102            R105            R203  
##  Min.   :     1   Min.   :11.00   Min.   : 1.00   Min.   :1.000   Min.   :1  
##  1st Qu.: 85009   1st Qu.:18.00   1st Qu.: 4.00   1st Qu.:1.000   1st Qu.:1  
##  Median :170017   Median :35.00   Median :10.00   Median :2.000   Median :1  
##  Mean   :170017   Mean   :43.05   Mean   :21.68   Mean   :1.579   Mean   :1  
##  3rd Qu.:255024   3rd Qu.:64.00   3rd Qu.:23.00   3rd Qu.:2.000   3rd Qu.:1  
##  Max.   :340032   Max.   :94.00   Max.   :79.00   Max.   :2.000   Max.   :1  
##       R301             FOOD             NONFOOD              EXPEND         
##  Min.   : 1.000   Min.   :  114857   Min.   :    38208   Min.   :   182190  
##  1st Qu.: 3.000   1st Qu.: 1295486   1st Qu.:   857667   1st Qu.:  2277443  
##  Median : 4.000   Median : 1916079   Median :  1403417   Median :  3429452  
##  Mean   : 3.757   Mean   : 2226646   Mean   :  2142186   Mean   :  4368832  
##  3rd Qu.: 5.000   3rd Qu.: 2785714   3rd Qu.:  2393183   3rd Qu.:  5212515  
##  Max.   :29.000   Max.   :31272857   Max.   :193333898   Max.   :201254112  
##      KAPITA           KALORI_KAP     PROTE_KAP         LEMAK_KAP      
##  Min.   :  114515   Min.   :1000   Min.   :  4.166   Min.   :  2.023  
##  1st Qu.:  656004   1st Qu.:1737   1st Qu.: 47.371   1st Qu.: 38.230  
##  Median :  997299   Median :2116   Median : 59.678   Median : 51.136  
##  Mean   : 1308460   Mean   :2217   Mean   : 64.088   Mean   : 55.374  
##  3rd Qu.: 1543848   3rd Qu.:2580   3rd Qu.: 75.468   3rd Qu.: 67.453  
##  Max.   :94740858   Max.   :4500   Max.   :364.666   Max.   :293.561  
##    KARBO_KAP            WERT              WEIND                WI1       
##  Min.   :  25.66   Min.   :   1.165   Min.   :    1.165   Min.   :    1  
##  1st Qu.: 254.84   1st Qu.:  67.080   1st Qu.:  212.398   1st Qu.: 7180  
##  Median : 312.18   Median : 141.845   Median :  474.874   Median :15780  
##  Mean   : 327.74   Mean   : 222.376   Mean   :  798.704   Mean   :15840  
##  3rd Qu.: 382.61   3rd Qu.: 296.702   3rd Qu.: 1011.605   3rd Qu.:24378  
##  Max.   :1042.51   Max.   :2082.520   Max.   :22907.723   Max.   :32974  
##       WI2        
##  Min.   :     1  
##  1st Qu.: 71016  
##  Median :156026  
##  Mean   :156601  
##  3rd Qu.:241034  
##  Max.   :326043

Visualisasi distribusi pengeluaran untuk makanan (FOOD)

ggplot(data_susenas_clean, aes(x = FOOD)) +
  geom_histogram(binwidth = 10000) +
  labs(title = "Distribusi Pengeluaran Makanan", x = "Pengeluaran Makanan", y = "Frekuensi")

Tabel frekuensi untuk variabel kategorik (jenis kelamin kepala rumah tangga)

table(data_susenas_clean$R102)
## 
##     1     2     3     4     5     6     7     8     9    10    11    12    13 
## 21499 22187 22173 21635 18431 16784 15837 15991 15019 12236 11526 10533  7991 
##    14    15    16    17    18    19    20    21    22    23    24    25    26 
##  6169  6547  5597  5588  5204  3073  3350  2882  3096  2352  2331  2897  2714 
##    27    28    29    30    31    32    33    34    35    36    71    72    73 
##  2152  1978  2445   538   525   426   530   503   520   530 23050 12719  8073 
##    74    75    76    77    78    79 
##  7580  5453  3287  2464  2471  1146

Analisis Multivariat

Melakukan PCA pada variabel terkait konsumsi dan pengeluaran per kapita dan Plot hasil PCA

pca_result <- PCA(data_susenas_clean[, c("FOOD", "NONFOOD", "EXPEND", "KAPITA", "KALORI_KAP", "PROTE_KAP", "LEMAK_KAP", "KARBO_KAP")], scale.unit = TRUE)

plot(pca_result, choix = "ind")

plot(pca_result, choix = "var")

Poin Kesimpulan