| Tugas STA1232-Analisis Eksplorasi Data, IPB University |
library(readr)
df43 <- read_csv("C:/Users/Muhammad Hafiz F/Downloads/2023 Maret JABAR - SUSENAS KP BP 4.3.csv")## New names:
## Rows: 25890 Columns: 20
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (20): ...1, URUT, R101, R102, R105, R301, FOOD, NONFOOD, EXPEND, KAPITA,...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
## # A tibble: 6 × 20
## ...1 URUT R101 R102 R105 R301 FOOD NONFOOD EXPEND KAPITA KALORI_KAP
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 500001 32 7 2 4 2660400 2.30e6 4.96e6 1.24e6 2365.
## 2 1 500002 32 72 1 2 1108714. 5.25e5 1.63e6 8.17e5 2612.
## 3 2 500003 32 6 2 3 2413886. 1.40e6 3.81e6 1.27e6 2527.
## 4 3 500004 32 72 1 7 7770000 4.31e6 1.21e7 1.73e6 3656.
## 5 4 500005 32 77 1 3 4932557. 4.62e7 5.12e7 1.71e7 2331.
## 6 5 500006 32 77 1 2 3272143. 1.14e7 1.47e7 7.34e6 2331.
## # ℹ 9 more variables: PROTE_KAP <dbl>, LEMAK_KAP <dbl>, KARBO_KAP <dbl>,
## # WERT <dbl>, WEIND <dbl>, PSU <dbl>, SSU <dbl>, WI1 <dbl>, WI2 <dbl>
## New names:
## Rows: 1051815 Columns: 24
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): COICOP dbl (23): ...1, URUT, R101, R102, R105, R301, KODE, KLP, B42K3,
## B42K4, B42K5...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
## # A tibble: 6 × 24
## ...1 URUT R101 R102 R105 R301 KODE KLP COICOP B42K3 B42K4 B42K5
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 0 500001 32 7 2 4 198 0 <NA> 0 965000 664000
## 2 1 500001 32 7 2 4 199 2 <NA> 1 0 0
## 3 2 500001 32 7 2 4 200 198 04221000 0 758000 0
## 4 3 500001 32 7 2 4 204 198 04310000 0 0 474000
## 5 4 500001 32 7 2 4 205 1 04510000 116. 0 0
## 6 5 500001 32 7 2 4 206 198 04510000 0 75000 0
## # ℹ 12 more variables: B42K3A <dbl>, B42K3B <dbl>, B42K3C <dbl>, B42K3D <dbl>,
## # B42K3E <dbl>, SEBULAN <dbl>, WERT <dbl>, WEIND <dbl>, PSU <dbl>, SSU <dbl>,
## # WI1 <dbl>, WI2 <dbl>
dfKor <- read_csv("C:/Users/Muhammad Hafiz F/Downloads/2023 Maret JABAR - SUSENAS KOR Rumah Tangga.csv")## New names:
## Rows: 25890 Columns: 199
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (18): R2002_A, R2002_B, R2002_C, R2002_D, R2204C_A, R2204C_B, R2204C_C,... dbl
## (180): ...1, URUT, PSU, SSU, WI1, WI2, R101, R102, R105, NUINFORT, R1701... lgl
## (1): R2204C_F
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
## # A tibble: 6 × 199
## ...1 URUT PSU SSU WI1 WI2 R101 R102 R105 NUINFORT R1701 R1702
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 500001 12448 123442 12435 123427 32 7 2 2 5 5
## 2 1 500002 31373 311039 31360 311024 32 72 1 1 1 1
## 3 2 500003 12092 119908 12079 119893 32 6 2 2 5 5
## 4 3 500004 31135 308689 31122 308674 32 72 1 2 5 5
## 5 4 500005 33988 336798 33975 336783 32 77 1 1 5 5
## 6 5 500006 34062 337531 34049 337516 32 77 1 1 5 5
## # ℹ 187 more variables: R1703 <dbl>, R1704 <dbl>, R1705 <dbl>, R1706 <dbl>,
## # R1707 <dbl>, R1708 <dbl>, NUINFORT1 <dbl>, R1801 <dbl>, R1802 <dbl>,
## # R1803 <dbl>, R1804 <dbl>, R1805 <dbl>, R1806 <dbl>, R1807 <dbl>,
## # R1808 <dbl>, R1809A <dbl>, R1809B <dbl>, R1809C <dbl>, R1809D <dbl>,
## # R1809E <dbl>, R1810A <dbl>, R1810B <dbl>, R1811A <dbl>, R1811B <dbl>,
## # R1812 <dbl>, R1813A <dbl>, R1813B <dbl>, R1813C <dbl>, R1813D <dbl>,
## # R1813E <dbl>, R1814A <dbl>, R1814B <dbl>, R1815A <dbl>, R1815B <dbl>, …
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df43 <- df43 %>%
filter(R101 == 32, R102 == 71) %>%
select(URUT, R101, R102, FOOD, NONFOOD, EXPEND)
head(df43)## # A tibble: 6 × 6
## URUT R101 R102 FOOD NONFOOD EXPEND
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 500039 32 71 2770286. 2305333. 5075619.
## 2 500064 32 71 262714. 467083. 729798.
## 3 500165 32 71 9556029. 19716167. 29272195.
## 4 500198 32 71 1819971. 1340583. 3160555.
## 5 500202 32 71 1496143. 958667. 2454810.
## 6 500237 32 71 2315571. 4581667. 6897238.
## # A tibble: 6 × 7
## URUT R101 R102 FOOD NONFOOD EXPEND FOODPerc
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 500039 32 71 2770286. 2305333. 5075619. 0.546
## 2 500064 32 71 262714. 467083. 729798. 0.360
## 3 500165 32 71 9556029. 19716167. 29272195. 0.326
## 4 500198 32 71 1819971. 1340583. 3160555. 0.576
## 5 500202 32 71 1496143. 958667. 2454810. 0.609
## 6 500237 32 71 2315571. 4581667. 6897238. 0.336
## # A tibble: 6 × 4
## URUT R101 R102 B42K4
## <dbl> <dbl> <dbl> <dbl>
## 1 500039 32 71 1594000
## 2 500039 32 71 0
## 3 500039 32 71 731000
## 4 500039 32 71 0
## 5 500039 32 71 147000
## 6 500039 32 71 0
df42 <- df42 %>%
group_by(URUT) %>%
mutate(Sum_B42K4 = sum(B42K4, na.rm = TRUE)) %>%
ungroup() %>%
select(-B42K4) %>%
distinct()
head(df42)## # A tibble: 6 × 4
## URUT R101 R102 Sum_B42K4
## <dbl> <dbl> <dbl> <dbl>
## 1 500039 32 71 3498000
## 2 500064 32 71 842000
## 3 500165 32 71 16912000
## 4 500198 32 71 2264000
## 5 500202 32 71 1488000
## 6 500237 32 71 5564000
## # A tibble: 6 × 4
## URUT R101 R102 R2001K
## <dbl> <dbl> <dbl> <dbl>
## 1 500039 32 71 5
## 2 500064 32 71 5
## 3 500165 32 71 1
## 4 500198 32 71 5
## 5 500202 32 71 5
## 6 500237 32 71 1
df <- df43 %>%
select(URUT, FOODPerc) %>%
left_join(df42 %>% select(URUT, Sum_B42K4), by = "URUT") %>%
left_join(dfKor %>% select(URUT, R2001K), by = "URUT")
head(df)## # A tibble: 6 × 4
## URUT FOODPerc Sum_B42K4 R2001K
## <dbl> <dbl> <dbl> <dbl>
## 1 500039 0.546 3498000 5
## 2 500064 0.360 842000 5
## 3 500165 0.326 16912000 1
## 4 500198 0.576 2264000 5
## 5 500202 0.609 1488000 5
## 6 500237 0.336 5564000 1
library(moments)
stat_summary <- function(data, column) {
if (!(column %in% colnames(data))) {
stop("Column not found in dataframe")
}
col_data <- data[[column]]
result <- list(
Mean = mean(col_data, na.rm = TRUE),
Standard_Deviation = sd(col_data, na.rm = TRUE),
Skewness = skewness(col_data, na.rm = TRUE),
Kurtosis = kurtosis(col_data, na.rm = TRUE)
)
return(result)
}## $Mean
## [1] 8169824
##
## $Standard_Deviation
## [1] 20935096
##
## $Skewness
## [1] 14.69717
##
## $Kurtosis
## [1] 305.7992
## $Mean
## [1] 0.4859775
##
## $Standard_Deviation
## [1] 0.1473592
##
## $Skewness
## [1] -0.5708026
##
## $Kurtosis
## [1] 2.914345
## $Mean
## [1] 27291186
##
## $Standard_Deviation
## [1] 43264527
##
## $Skewness
## [1] 7.852352
##
## $Kurtosis
## [1] 78.93799