Library

library(readr)
## Warning: package 'readr' was built under R version 4.3.2
library(tidyverse) 
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'dplyr' was built under R version 4.3.2
## Warning: package 'stringr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ purrr     1.0.2
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggridges)
library(GGally) 
## Warning: package 'GGally' was built under R version 4.3.2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(dplyr)
library(treemap)
## Warning: package 'treemap' was built under R version 4.3.2
library(treemapify)
## Warning: package 'treemapify' was built under R version 4.3.2

Data

dataset <- read_csv("D:/Kuliah/Statistika Dan Sains Data/Semester 4/Visualisasi Data/Tugas/Project/Project SUSENAS/2023 Maret JABAR - SUSENAS KOR Rumah Tangga.csv")
## New names:
## Rows: 25890 Columns: 199
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (18): R2002_A, R2002_B, R2002_C, R2002_D, R2204C_A, R2204C_B, R2204C_C,... dbl
## (180): ...1, URUT, PSU, SSU, WI1, WI2, R101, R102, R105, NUINFORT, R1701... lgl
## (1): R2204C_F
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
dataset
## # A tibble: 25,890 × 199
##     ...1   URUT   PSU    SSU   WI1    WI2  R101  R102  R105 NUINFORT R1701 R1702
##    <dbl>  <dbl> <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl>
##  1     0 500001 12448 123442 12435 123427    32     7     2        2     5     5
##  2     1 500002 31373 311039 31360 311024    32    72     1        1     1     1
##  3     2 500003 12092 119908 12079 119893    32     6     2        2     5     5
##  4     3 500004 31135 308689 31122 308674    32    72     1        2     5     5
##  5     4 500005 33988 336798 33975 336783    32    77     1        1     5     5
##  6     5 500006 34062 337531 34049 337516    32    77     1        1     5     5
##  7     6 500007 33428 331261 33415 331246    32    75     1        1     1     1
##  8     7 500008 18431 182888 18418 182873    32    11     2        2     1     1
##  9     8 500009 18089 179477 18076 179462    32    10     2        2     1     5
## 10     9 500010   114   1020   101   1005    32     1     1        2     5     5
## # ℹ 25,880 more rows
## # ℹ 187 more variables: R1703 <dbl>, R1704 <dbl>, R1705 <dbl>, R1706 <dbl>,
## #   R1707 <dbl>, R1708 <dbl>, NUINFORT1 <dbl>, R1801 <dbl>, R1802 <dbl>,
## #   R1803 <dbl>, R1804 <dbl>, R1805 <dbl>, R1806 <dbl>, R1807 <dbl>,
## #   R1808 <dbl>, R1809A <dbl>, R1809B <dbl>, R1809C <dbl>, R1809D <dbl>,
## #   R1809E <dbl>, R1810A <dbl>, R1810B <dbl>, R1811A <dbl>, R1811B <dbl>,
## #   R1812 <dbl>, R1813A <dbl>, R1813B <dbl>, R1813C <dbl>, R1813D <dbl>, …

Visualisasi Besaran Data

Data Bar Chart

dataset$R1802[dataset$R1802 == '1'] <- 'milik sendiri'
dataset$R1802[dataset$R1802 == '2'] <- 'sewa'
dataset$R1802[dataset$R1802 == '3'] <- 'bebas sewa'
dataset$R1802[dataset$R1802 == '4'] <- 'rumah dinas'
dataset$R1802[dataset$R1802 == '5'] <- 'lainnya'
ggplot(dataset, aes(x= R1802))+ 
  geom_bar(fill = "black",width=0.3)+
  labs(title = "Surat Kepemilikan Rumah", x = "Bentuk Kepemilikan", y = "Jumlah")+
  theme_get()+
  coord_flip()

Data Lollipop Chart

dataset$R1817[dataset$R1817 == '0'] <- 'Tidak Masak'
dataset$R1817[dataset$R1817 == '1'] <- 'Listrik'
dataset$R1817[dataset$R1817 == '2'] <- 'Elpiji 5KG'
dataset$R1817[dataset$R1817 == '3'] <- 'Elpiji 12KG'
dataset$R1817[dataset$R1817 == '4'] <- 'Elpiji 3KG'
dataset$R1817[dataset$R1817 == '5'] <- 'Gas Kota'
dataset$R1817[dataset$R1817 == '6'] <- 'Bio Gas'
dataset$R1817[dataset$R1817 == '7'] <- 'Minyak Tanah'
dataset$R1817[dataset$R1817 == '8'] <- 'Bricket'
dataset$R1817[dataset$R1817 == '9'] <- 'Arang'
dataset$R1817[dataset$R1817 == '10'] <- 'Kayu Bakar'
dataset$R1817[dataset$R1817 == '11'] <- 'lainnya'
dataset %>%
  count(R1817) %>%
  arrange(desc(n)) %>%
  ggplot() +
  geom_segment(aes(x = fct_reorder(as.factor(R1817), n), xend = fct_reorder(as.factor(R1817), n), y = 0, yend = n), color = "RED", linewidth = 2) +
  geom_point(aes(x = fct_reorder(as.factor(R1817), n), y = n), color = "BLUE", size = 2) +
  scale_y_continuous(expand = c(0, 0)) +
  coord_flip() +
  xlab("Jenis bahan bakar untuk memasak") +
  ylab("Banyaknya Pengguna") +
  theme_light() +
  theme(plot.title = element_text(hjust = 0.5))

Visualisasi Sebaran Data

Data Histogram

ggplot(dataset)+
  geom_histogram(aes(x=R1804),fill="pink", color="black", alpha=1)+
  labs(title="Histogram Sebaran Luas Tanah")+
  xlab("Luas Tanah")+
  ylab("Jumlah")+
  xlim(0,300)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 144 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

Data Density Plot

ggplot(dataset)+
  geom_density(aes(x=R2208BI4,fill= "Basement" ),fill = "green", color="black", alpha=0.8)+
  labs(title="Density Plot Sebaran Nilai Bantuan Diterima")+
  xlab("Uang Bantuan")+
  ylab("Total Penerima")+
  xlim(1,1000000)+
  theme_light()
## Warning: Removed 22205 rows containing non-finite outside the scale range
## (`stat_density()`).

Data Pie Chart

df <- dataset %>%
  select(R1901B) %>%
  filter(R1901B != 0) %>%
  mutate(R1901B = as.factor(R1901B)) %>%
  count(R1901B) %>%
  arrange(desc(n)) %>%
  top_n(10, n)
ggplot(df, aes(x = "", y = n, fill = R1901B)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  scale_fill_brewer(palette = "Set1")+
  theme_void() +
  theme(legend.position = "right") +
  labs(title = "Melakukan Kredit", fill = "kredit")+
  geom_text(aes(label = n), position = position_stack(vjust = 0.5))

Data Treemap

treemap(dataset, 
        index=c("R1810A", "R102"), 
        vSize="R101",
        draw=TRUE,
        title="Treemap: Sumber Utama Air Masyarakat",
        fontsize.title=20,
        fontsize.labels=12,
        fontcolor.labels="white")