library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'dplyr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(ggridges)
library(GGally)
## Warning: package 'GGally' was built under R version 4.3.2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.2
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.2
library(wesanderson)
## Warning: package 'wesanderson' was built under R version 4.3.2
library(ggmosaic)
## Warning: package 'ggmosaic' was built under R version 4.3.2
##
## Attaching package: 'ggmosaic'
##
## The following object is masked from 'package:GGally':
##
## happy
library(treemap)
## Warning: package 'treemap' was built under R version 4.3.2
library(treemapify)
## Warning: package 'treemapify' was built under R version 4.3.2
data_house <- read_xlsx("C:\\Users\\Muhammad Rizqa Salas\\Downloads\\Data pendidikan terakhir Jawa Barat.xlsx")
str(data_house)
## tibble [84,688 × 3] (S3: tbl_df/tbl/data.frame)
## $ Pendidikan: chr [1:84688] "SD" "D III" "SMK" "SD" ...
## $ Kota : chr [1:84688] "Kab. Ciamis" "Kab. Ciamis" "Kab. Ciamis" "Kab. Ciamis" ...
## $ Rokok : num [1:84688] 0 0 0 0 0 35 49 0 0 112 ...
data_house%>%count(Pendidikan)%>%
slice_max(n=10, order_by = n) %>%
ggplot()+
geom_col(aes(x=fct_reorder(as.factor(Pendidikan),n),y=n), fill="maroon",
width=0.4) +
scale_y_continuous(expand = c(0,0))+
coord_flip() +
ggtitle("Top 10 Pendidikan Terakhir Masyarakat Jawa Barat") +
xlab("") +
ylab("jumlah") +
theme_classic() +
theme(plot.title = element_text(hjust = .5, size = 20))
filtered_data <- data_house %>% filter(Pendidikan == "SD") %>% select(Pendidikan, Kota)
str(filtered_data)
## tibble [30,953 × 2] (S3: tbl_df/tbl/data.frame)
## $ Pendidikan: chr [1:30953] "SD" "SD" "SD" "SD" ...
## $ Kota : chr [1:30953] "Kab. Ciamis" "Kab. Ciamis" "Kota Sukabumi" "Kota Sukabumi" ...
filtered_data %>%
count(Kota) %>%
slice_max(n=10, order_by = n) %>%
arrange(desc(n)) %>% # gunakan arrange(n) untuk mengurutkan sebaliknya
ggplot() +
geom_segment(aes(x = fct_reorder(as.factor(Kota), n), xend = fct_reorder(as.factor(Kota), n), y = 0, yend = n), color = "steelblue") +
geom_point(aes(x = fct_reorder(as.factor(Kota), n), y = n), color = "orange", size = 2) +
scale_y_continuous(expand = c(0, 0)) +
coord_flip() +
ggtitle("Top 10 Kabupaten/Kota dengan lulusan SD Terbanyak") +
xlab("") +
ylab("Jumlah") +
theme_light() +
theme(plot.title = element_text(hjust = 0.5))
# Visualisasi Sebaran
data_house <- data_house
data_house <- data_house %>% filter(Rokok > 0)
ggplot(data_house)+
geom_boxplot(aes(y=Rokok))+
labs(title="Boxplot Sebaran Jumlah Konsumsi Rokok dalam Seminggu")
data_long <- tidyr::pivot_longer(data_house, cols = c(Pendidikan, Kota))
combined_plot <- ggplot(data_long, aes(x = value, fill = name)) +
geom_bar() +
facet_wrap(~ name, scales = "free") +
labs(title = "Sebaran Seluruh Kolom Kategorikal", x = "Kategori", y = "Frekuensi") +
theme_minimal() +
scale_fill_manual(values = wes_palette("Darjeeling1", n = 2))
print(combined_plot)
treemap(data_house,
index=c("Pendidikan", "Kota"),
vSize="Rokok",
draw=TRUE,
title="Treemap: Konsumsi Rokok/minggu Berdasarkan Pendidikan Terkahir dan Wilayah",
fontsize.title=20,
fontsize.labels=12,
fontcolor.labels="white")
data <- data.frame(
category=c("SD", "SMA", "SMP", "S I"),
count=c(30990, 14170, 13902, 5875)
)
data$fraction = data$count / sum(data$count)
data$ymax = cumsum(data$fraction)
data$ymin = c(0, head(data$ymax, n=-1))
ggplot(data, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=category)) +
geom_rect() +
coord_polar(theta="y") +
xlim(c(2, 4)) +
theme_classic()