Summary of Household Expenses in Bogor (Susenas KP, KOR

Tugas STA1232-Analisis Eksplorasi Data, IPB University

1 Input data

1.1 Susenas KP 4.3

library(readr)
df43 <- read_csv("C:/Users/Muhammad Hafiz F/Downloads/2023 Maret JABAR - SUSENAS KP BP 4.3.csv")

## New names:
## Rows: 25890 Columns: 20
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (20): ...1, URUT, R101, R102, R105, R301, FOOD, NONFOOD, EXPEND, KAPITA,...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

head(df43)

## # A tibble: 6 × 20
##    ...1   URUT  R101  R102  R105  R301     FOOD NONFOOD EXPEND KAPITA KALORI_KAP
##   <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>   <dbl>  <dbl>  <dbl>      <dbl>
## 1     0 500001    32     7     2     4 2660400   2.30e6 4.96e6 1.24e6      2365.
## 2     1 500002    32    72     1     2 1108714.  5.25e5 1.63e6 8.17e5      2612.
## 3     2 500003    32     6     2     3 2413886.  1.40e6 3.81e6 1.27e6      2527.
## 4     3 500004    32    72     1     7 7770000   4.31e6 1.21e7 1.73e6      3656.
## 5     4 500005    32    77     1     3 4932557.  4.62e7 5.12e7 1.71e7      2331.
## 6     5 500006    32    77     1     2 3272143.  1.14e7 1.47e7 7.34e6      2331.
## # ℹ 9 more variables: PROTE_KAP <dbl>, LEMAK_KAP <dbl>, KARBO_KAP <dbl>,
## #   WERT <dbl>, WEIND <dbl>, PSU <dbl>, SSU <dbl>, WI1 <dbl>, WI2 <dbl>

1.2 Susenas KP 4.2

df42 <- read_csv("C:/Users/Muhammad Hafiz F/Downloads/2023 Maret JABAR - SUSENAS KP BP 4.2.csv")

## New names:
## Rows: 1051815 Columns: 24
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): COICOP dbl (23): ...1, URUT, R101, R102, R105, R301, KODE, KLP, B42K3,
## B42K4, B42K5...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

head(df42)

## # A tibble: 6 × 24
##    ...1   URUT  R101  R102  R105  R301  KODE   KLP COICOP   B42K3  B42K4  B42K5
##   <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>    <dbl>  <dbl>  <dbl>
## 1     0 500001    32     7     2     4   198     0 <NA>        0  965000 664000
## 2     1 500001    32     7     2     4   199     2 <NA>        1       0      0
## 3     2 500001    32     7     2     4   200   198 04221000    0  758000      0
## 4     3 500001    32     7     2     4   204   198 04310000    0       0 474000
## 5     4 500001    32     7     2     4   205     1 04510000  116.      0      0
## 6     5 500001    32     7     2     4   206   198 04510000    0   75000      0
## # ℹ 12 more variables: B42K3A <dbl>, B42K3B <dbl>, B42K3C <dbl>, B42K3D <dbl>,
## #   B42K3E <dbl>, SEBULAN <dbl>, WERT <dbl>, WEIND <dbl>, PSU <dbl>, SSU <dbl>,
## #   WI1 <dbl>, WI2 <dbl>

1.3 Susenas KOR

dfKor <- read_csv("C:/Users/Muhammad Hafiz F/Downloads/2023 Maret JABAR - SUSENAS KOR Rumah Tangga.csv")

## New names:
## Rows: 25890 Columns: 199
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (18): R2002_A, R2002_B, R2002_C, R2002_D, R2204C_A, R2204C_B, R2204C_C,... dbl
## (180): ...1, URUT, PSU, SSU, WI1, WI2, R101, R102, R105, NUINFORT, R1701... lgl
## (1): R2204C_F
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

head(dfKor)

## # A tibble: 6 × 199
##    ...1   URUT   PSU    SSU   WI1    WI2  R101  R102  R105 NUINFORT R1701 R1702
##   <dbl>  <dbl> <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl>
## 1     0 500001 12448 123442 12435 123427    32     7     2        2     5     5
## 2     1 500002 31373 311039 31360 311024    32    72     1        1     1     1
## 3     2 500003 12092 119908 12079 119893    32     6     2        2     5     5
## 4     3 500004 31135 308689 31122 308674    32    72     1        2     5     5
## 5     4 500005 33988 336798 33975 336783    32    77     1        1     5     5
## 6     5 500006 34062 337531 34049 337516    32    77     1        1     5     5
## # ℹ 187 more variables: R1703 <dbl>, R1704 <dbl>, R1705 <dbl>, R1706 <dbl>,
## #   R1707 <dbl>, R1708 <dbl>, NUINFORT1 <dbl>, R1801 <dbl>, R1802 <dbl>,
## #   R1803 <dbl>, R1804 <dbl>, R1805 <dbl>, R1806 <dbl>, R1807 <dbl>,
## #   R1808 <dbl>, R1809A <dbl>, R1809B <dbl>, R1809C <dbl>, R1809D <dbl>,
## #   R1809E <dbl>, R1810A <dbl>, R1810B <dbl>, R1811A <dbl>, R1811B <dbl>,
## #   R1812 <dbl>, R1813A <dbl>, R1813B <dbl>, R1813C <dbl>, R1813D <dbl>,
## #   R1813E <dbl>, R1814A <dbl>, R1814B <dbl>, R1815A <dbl>, R1815B <dbl>, …

2 Data cleaning and formatting

2.1 Susenas KP 4.3

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

df43 <- df43 %>% 
  filter(R101 == 32, R102 == 71) %>% 
  select(URUT, R101, R102, FOOD, NONFOOD, EXPEND)

head(df43)

## # A tibble: 6 × 6
##     URUT  R101  R102     FOOD   NONFOOD    EXPEND
##    <dbl> <dbl> <dbl>    <dbl>     <dbl>     <dbl>
## 1 500039    32    71 2770286.  2305333.  5075619.
## 2 500064    32    71  262714.   467083.   729798.
## 3 500165    32    71 9556029. 19716167. 29272195.
## 4 500198    32    71 1819971.  1340583.  3160555.
## 5 500202    32    71 1496143.   958667.  2454810.
## 6 500237    32    71 2315571.  4581667.  6897238.

df43 <- df43 %>% mutate(FOODPerc = FOOD / EXPEND)

head(df43)

## # A tibble: 6 × 7
##     URUT  R101  R102     FOOD   NONFOOD    EXPEND FOODPerc
##    <dbl> <dbl> <dbl>    <dbl>     <dbl>     <dbl>    <dbl>
## 1 500039    32    71 2770286.  2305333.  5075619.    0.546
## 2 500064    32    71  262714.   467083.   729798.    0.360
## 3 500165    32    71 9556029. 19716167. 29272195.    0.326
## 4 500198    32    71 1819971.  1340583.  3160555.    0.576
## 5 500202    32    71 1496143.   958667.  2454810.    0.609
## 6 500237    32    71 2315571.  4581667.  6897238.    0.336

2.2 Susenas KP 4.2

df42 <- df42 %>% 
  filter(R101 == 32, R102 == 71) %>% 
  select(URUT, R101, R102, B42K4)

head(df42)

## # A tibble: 6 × 4
##     URUT  R101  R102   B42K4
##    <dbl> <dbl> <dbl>   <dbl>
## 1 500039    32    71 1594000
## 2 500039    32    71       0
## 3 500039    32    71  731000
## 4 500039    32    71       0
## 5 500039    32    71  147000
## 6 500039    32    71       0

df42 <- df42 %>%
  group_by(URUT) %>%
  mutate(Sum_B42K4 = sum(B42K4, na.rm = TRUE)) %>%
  ungroup() %>%
  select(-B42K4) %>% 
  distinct()

head(df42)

## # A tibble: 6 × 4
##     URUT  R101  R102 Sum_B42K4
##    <dbl> <dbl> <dbl>     <dbl>
## 1 500039    32    71   3498000
## 2 500064    32    71    842000
## 3 500165    32    71  16912000
## 4 500198    32    71   2264000
## 5 500202    32    71   1488000
## 6 500237    32    71   5564000

2.3 Susenas KOR

dfKor <- dfKor %>% 
  filter(R101 == 32, R102 == 71) %>% 
  select(URUT, R101, R102, R2001K)

head(dfKor)

## # A tibble: 6 × 4
##     URUT  R101  R102 R2001K
##    <dbl> <dbl> <dbl>  <dbl>
## 1 500039    32    71      5
## 2 500064    32    71      5
## 3 500165    32    71      1
## 4 500198    32    71      5
## 5 500202    32    71      5
## 6 500237    32    71      1

3 Data merging

df <- df43 %>% 
  select(URUT, FOODPerc) %>% 
  left_join(df42 %>% select(URUT, Sum_B42K4), by = "URUT") %>% 
  left_join(dfKor %>% select(URUT, R2001K), by = "URUT")

head(df)

## # A tibble: 6 × 4
##     URUT FOODPerc Sum_B42K4 R2001K
##    <dbl>    <dbl>     <dbl>  <dbl>
## 1 500039    0.546   3498000      5
## 2 500064    0.360    842000      5
## 3 500165    0.326  16912000      1
## 4 500198    0.576   2264000      5
## 5 500202    0.609   1488000      5
## 6 500237    0.336   5564000      1

4 Data summary

4.1 Function to find mean, stdev, skewness, and kurtosis

library(moments)

stat_summary <- function(data, column) {
  if (!(column %in% colnames(data))) {
    stop("Column not found in dataframe")
  }
  
  col_data <- data[[column]]
  
  result <- list(
    Mean = mean(col_data, na.rm = TRUE),
    Standard_Deviation = sd(col_data, na.rm = TRUE),
    Skewness = skewness(col_data, na.rm = TRUE),
    Kurtosis = kurtosis(col_data, na.rm = TRUE)
  )
  
  return(result)
}

4.2 Statistical summary of total (nonfood) household expense within the last month

stat_summary(df, "Sum_B42K4")

## $Mean
## [1] 8169824
## 
## $Standard_Deviation
## [1] 20935096
## 
## $Skewness
## [1] 14.69717
## 
## $Kurtosis
## [1] 305.7992

4.3 Statistical summary of the propotion of household expense used for food

stat_summary(df, "FOODPerc")

## $Mean
## [1] 0.4859775
## 
## $Standard_Deviation
## [1] 0.1473592
## 
## $Skewness
## [1] -0.5708026
## 
## $Kurtosis
## [1] 2.914345

4.4 Statistical summary of total (nonfood) household expense within the last month, separated between car owners and non-car owners

4.4.1 Car owners

stat_summary(df %>% filter(R2001K == 1), "Sum_B42K4")

## $Mean
## [1] 27291186
## 
## $Standard_Deviation
## [1] 43264527
## 
## $Skewness
## [1] 7.852352
## 
## $Kurtosis
## [1] 78.93799

4.4.2 Non-car owners

stat_summary(df %>% filter(R2001K == 5), "Sum_B42K4")

## $Mean
## [1] 3974540
## 
## $Standard_Deviation
## [1] 5302518
## 
## $Skewness
## [1] 10.30801
## 
## $Kurtosis
## [1] 164.7758

Summary of Household Expenses in Bogor (Susenas KP, KOR – March 2023)

Muhammad Hafiz Fazli

2025-02-07

1 Input data

1.1 Susenas KP 4.3

1.2 Susenas KP 4.2

1.3 Susenas KOR

2 Data cleaning and formatting

2.1 Susenas KP 4.3

2.2 Susenas KP 4.2

2.3 Susenas KOR

3 Data merging

4 Data summary

4.1 Function to find mean, stdev, skewness, and kurtosis

4.2 Statistical summary of total (nonfood) household expense within the last month

4.3 Statistical summary of the propotion of household expense used for food

4.4 Statistical summary of total (nonfood) household expense within the last month, separated between car owners and non-car owners

4.4.1 Car owners

4.4.2 Non-car owners