## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
## Loading required package: viridisLite
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
Data yang bersumber dari https://github.com/mwaskom/seaborn-data/blob/master/tips.csv
data <- read.table("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv", header=T, sep=",") %>%
mutate(tip = round(tip/total_bill*100, 1))
dataBerikut banyaknya pengunjung restoran (n) dan median
dari tip yang diterima staf restoran berdasarkan hari
(day) dan jenis kelamin (sex) pengunjung yang
datang
#counts the number of value per group and subgroup
counts=data %>%
group_by(sex,day) %>%
summarize(
n=n(),
median=median(tip)
)## `summarise()` has grouped output by 'sex'. You can override using the `.groups`
## argument.
Plotting data dengan boxplot masing-masing hari
(day) dan jenis kelamin (sex).
#grouped
data %>%
mutate(sex=fct_reorder(sex, tip)) %>%
mutate(sex=factor(sex, levels = c("Male","Female")))%>%
ggplot(aes(fill=day, y=tip, x=sex)) +
geom_boxplot(position=position_dodge2(preserve = "total"), alpha=0.5, outlier.colour="transparent", varwidth = TRUE) +
geom_point(color="green", size=1, width=0.1, position=position_jitterdodge() , alpha=0.4) +
scale_fill_viridis(discrete=T, name="") +
geom_text(data=counts, aes(label=paste0("n: ",n), y=median-2), position=position_dodge(1),
hjust=0.5) +
theme_ipsum() +
xlab("") +
ylab("Tip (%)") +
ylim(0,35)## Warning: Can't preserve total widths when `varwidth = TRUE`.
## Warning in geom_point(color = "green", size = 1, width = 0.1, position =
## position_jitterdodge(), : Ignoring unknown parameters: `width`
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
Dari boxplot di atas, terlihat bahwa boxplot sebaran
tip yang diterima staf restoran dari pengunjung perempuan
maupun laki-laki semakin besar saat weekend. Pengunjung
perempuan memberikan tip lebih banyak pada hari Jumat,
sedangkan pengunjung laki-laki lebih banyak memberikan tip
pada hari Minggu.
Data bersumber dari : https://raw.githubusercontent.com/holtzy/data_to_viz/master/Example_dataset/4_ThreeNum.csv.
#data_pop
data_pop <- read.table("https://raw.githubusercontent.com/holtzy/data_to_viz/master/Example_dataset/4_ThreeNum.csv",
header=T, sep=",")
data_pop[which(data_pop$gdpPercap==-1),"gdpPercap"] <- 1
colnames(data_pop) <- c("Country", "Continent", "lifeExp", "pop" , "gdpPercap")
data_popLollipop plot pada dasarnya adalah barplot, dimana
bar diubah menjadi garis dan titik. Berikut lollipop
plot berdasarkan variabel gdpPercap (produk domestik
bruto perkapita) untuk masing-masing variabel Country.
# lollipop
data_pop %>%
filter(!is.na(gdpPercap)) %>%
arrange(gdpPercap) %>%
tail(50) %>%
arrange(Continent, gdpPercap) %>%
mutate(Country=factor(Country, Country)) %>%
mutate(gdpPercap) %>%
ggplot( aes(x=Country, y=gdpPercap, color=Continent) ) +
geom_segment( aes(x=Country ,xend=Country, y=0, yend=gdpPercap), color="grey") +
geom_point(size=3) +
scale_color_viridis(discrete=TRUE) +
coord_flip() +
theme_ipsum() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="none"
) +
xlab("") +
ylab("gdpPercap (USD)")## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
Plot ini menunjukkan hubungan antar numerik dan variabel
kategori. Dari hasil tersebut terlihat bahwa setiap warna mewakili
setiap benua yang berbeda. Diketahui bahwa negara yang memperoleh
gdpPercap (USD) tertinggi yaitu negara Norway. Selanjutnya
yaitu Kuwait dan Singapore. Sedangkan negara yang memperoleh
gdpPercap (USD) terendah yaitu Mauritius.
Treemap merupakan visualisasi data dalam bentuk
rectangle berwarna-warni yang mewakili kategori tertentu dan
ukurannya menggambarkan proporsi nilai yang berkaitan dengan kategori
tersebut. Berikut treemap untuk pengelompokkan benua
berdasarkan warna pada setiap negara dari hasil variabel
gdpPercap (USD).
# treemap
library(treemap)
p <- treemap(data_pop,
# data
index=c("Country", "Continent", "lifeExp", "pop"),
vSize="gdpPercap",
type="index",
# Main
title="",
palette="Dark2",
# Borders:
border.col=c("black", "grey", "grey"),
border.lwds=c(1,0.5,0.1),
# Labels
fontsize.labels=c(0.7, 0.4, 0.3),
fontcolor.labels=c("white", "white", "black"),
fontface.labels=1,
bg.labels=c("transparent"),
align.labels=list( c("center", "center"), c("left", "top"),
c("right", "bottom")),
overlap.labels=0.5,
inflate.labels=T
)Treemap di atas dibuat berdasarkan tingginya
gdpPercap (USD) di setiap negara. Warna yang paling gelap
menunjukkan besarnya gdpPercap (USD) suatu negara. Benua
Europe merupakan benua yang tinggi angka
gdpPercap (USD), yaitu pada negara Norway. Selanjutnya
benua Asia yaitu negara Kuwait serta Singapore.