Library

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'dplyr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(ggridges)
library(GGally) 
## Warning: package 'GGally' was built under R version 4.3.2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(plotly) 
## Warning: package 'plotly' was built under R version 4.3.2
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.2
library(wesanderson)
## Warning: package 'wesanderson' was built under R version 4.3.2
library(ggmosaic)
## Warning: package 'ggmosaic' was built under R version 4.3.2
## 
## Attaching package: 'ggmosaic'
## 
## The following object is masked from 'package:GGally':
## 
##     happy
library(treemap)
## Warning: package 'treemap' was built under R version 4.3.2
library(treemapify)
## Warning: package 'treemapify' was built under R version 4.3.2

Read Data

data_house <- read_xlsx("C:\\Users\\Muhammad Rizqa Salas\\Downloads\\Data pendidikan terakhir Jawa Barat.xlsx")

str(data_house)
## tibble [84,688 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Pendidikan: chr [1:84688] "SD" "D III" "SMK" "SD" ...
##  $ Kota      : chr [1:84688] "Kab. Ciamis" "Kab. Ciamis" "Kab. Ciamis" "Kab. Ciamis" ...
##  $ Rokok     : num [1:84688] 0 0 0 0 0 35 49 0 0 112 ...

Visualisasi Besaran

data_house%>%count(Pendidikan)%>%
  slice_max(n=10, order_by = n) %>%
  ggplot()+
  geom_col(aes(x=fct_reorder(as.factor(Pendidikan),n),y=n), fill="maroon", 
           width=0.4) +
    scale_y_continuous(expand = c(0,0))+
    coord_flip() +
    ggtitle("Top 10 Pendidikan Terakhir Masyarakat Jawa Barat") +
    xlab("") +
    ylab("jumlah") +
    theme_classic() +
    theme(plot.title = element_text(hjust = .5, size = 20))

filtered_data <- data_house %>% filter(Pendidikan == "SD") %>% select(Pendidikan, Kota)

str(filtered_data)
## tibble [30,953 × 2] (S3: tbl_df/tbl/data.frame)
##  $ Pendidikan: chr [1:30953] "SD" "SD" "SD" "SD" ...
##  $ Kota      : chr [1:30953] "Kab. Ciamis" "Kab. Ciamis" "Kota Sukabumi" "Kota Sukabumi" ...
filtered_data %>%
  count(Kota) %>%
  slice_max(n=10, order_by = n) %>%
  arrange(desc(n)) %>% # gunakan arrange(n) untuk mengurutkan sebaliknya
  ggplot() +
  geom_segment(aes(x = fct_reorder(as.factor(Kota), n), xend = fct_reorder(as.factor(Kota), n), y = 0, yend = n), color = "steelblue") +
  geom_point(aes(x = fct_reorder(as.factor(Kota), n), y = n), color = "orange", size = 2) +
  scale_y_continuous(expand = c(0, 0)) +
  coord_flip() +
  ggtitle("Top 10 Kabupaten/Kota dengan lulusan SD Terbanyak") +
  xlab("") +
  ylab("Jumlah") +
  theme_light() +
  theme(plot.title = element_text(hjust = 0.5))

# Visualisasi Sebaran

data_house <- data_house
data_house <- data_house %>% filter(Rokok > 0)
ggplot(data_house)+
  geom_boxplot(aes(y=Rokok))+
  labs(title="Boxplot Sebaran Jumlah Konsumsi Rokok dalam Seminggu")

data_long <- tidyr::pivot_longer(data_house, cols = c(Pendidikan, Kota))

combined_plot <- ggplot(data_long, aes(x = value, fill = name)) +
  geom_bar() +
  facet_wrap(~ name, scales = "free") +
  labs(title = "Sebaran Seluruh Kolom Kategorikal", x = "Kategori", y = "Frekuensi") +
  theme_minimal() +
  scale_fill_manual(values = wes_palette("Darjeeling1", n = 2))
  print(combined_plot)

Visualisasi Komposisi

treemap(data_house, 
        index=c("Pendidikan", "Kota"), 
        vSize="Rokok",
        draw=TRUE,
        title="Treemap: Konsumsi Rokok/minggu Berdasarkan Pendidikan Terkahir dan Wilayah",
        fontsize.title=20,
        fontsize.labels=12,
        fontcolor.labels="white")

data <- data.frame(
  category=c("SD", "SMA", "SMP", "S I"),
  count=c(30990, 14170, 13902, 5875)
)
 
data$fraction = data$count / sum(data$count)
data$ymax = cumsum(data$fraction)
data$ymin = c(0, head(data$ymax, n=-1))
 
ggplot(data, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=category)) +
    geom_rect() +
    coord_polar(theta="y") +
    xlim(c(2, 4)) +
    theme_classic()