This lab teaches core ggplot2 charts (scatter, bar, line, histogram, box/violin, faceting) and gives quick recipes for “fancy” visuals (ridgeline, raincloud-like plots, beeswarm, interactive and animated variants). Example code is short, copy‑able, and reproducible.
References used in this training (for practice & deeper dives):
We’ll use ggplot2 plus a few helper packages. The chunk below installs anything missing and sets knitr defaults.
install_if_missing <- function(pkgs){
to_install <- pkgs[!pkgs %in% rownames(installed.packages())]
if (length(to_install)) install.packages(to_install, dependencies = TRUE)
}
core_pkgs <- c("ggplot2", "dplyr", "tibble", "tidyr", "readr", "forcats")
fancy_pkgs <- c("ggridges", "ggbeeswarm", "ggdist", "viridisLite", "plotly", "gifski", "gganimate")
install_if_missing(c(core_pkgs, fancy_pkgs))
library(ggplot2)
library(dplyr)
library(tidyr)
library(tibble)
library(forcats)
library(viridisLite) # Palettes
Datasets we’ll use - mtcars
(built‑in):
numeric + categorical conversions. - iris
(built‑in):
classic 3‑class dataset. - diamonds
(from ggplot2): large
categorical + numeric mix (subset to keep plots fast).
mtcars |> head()
iris |> head()
ggplot2::diamonds |> dplyr::sample_n(2000) |> head()
A plot =
ggplot(data, aes(...)) + geom_*() + scale_*() + facet_*() + theme()
Add layers with +
. Aes maps data→visuals (x, y, color,
fill, size, shape).
See: ggplot2 official intro and reference.
When: two numeric variables; optionally color/shape
by a factor.
Refs: R Graph Gallery scatter overview and basic
example #272.
df <- mtcars |>
rownames_to_column("model") |>
mutate(cyl = factor(cyl))
ggplot(df, aes(x = wt, y = mpg, color = cyl)) +
geom_point(alpha = 0.8) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "MPG vs Weight", x = "Weight (1000 lbs)", y = "MPG", color = "Cylinders")
Quick tweaks: transparency (alpha
),
shape/size, geom_jitter()
to reduce overlap.
geom_bar()
(stat_count)geom_col()
(stat_identity)cats <- as.data.frame(Titanic) |>
dplyr::filter(Age == "Adult") |>
dplyr::group_by(Sex, Survived) |>
dplyr::summarise(n = sum(Freq), .groups = "drop")
# values present -> geom_col
ggplot(cats, aes(x = Sex, y = n, fill = Survived)) +
geom_col(position = "dodge") +
labs(title = "Adult passengers by Sex and Survival", x = NULL, y = "Count") +
guides(fill = guide_legend(reverse = TRUE))
ggplot(cats, aes(x = Sex, y = n, fill = Survived)) +
geom_col(position = "fill") +
scale_y_continuous(labels = scales::percent) +
labs(title = "Composition (Percent Stacked)", x = NULL, y = "Percent")
set.seed(1)
ts <- tibble(
day = 1:30,
sales = cumsum(rnorm(30, 2, 4))
)
ggplot(ts, aes(day, sales)) +
geom_line() +
geom_point() +
labs(title = "Toy time series", x = "Day", y = "Sales")
ggplot(mtcars, aes(mpg)) +
geom_histogram(bins = 10, fill = "grey80") +
geom_density(aes(y = after_stat(density)), linewidth = 1) +
labs(title = "Histogram + Density", x = "MPG", y = "Density")
iris2 <- iris |>
as_tibble() |>
mutate(Species = forcats::fct_reorder(Species, Sepal.Length, .fun = median))
ggplot(iris2, aes(Species, Sepal.Length, fill = Species)) +
geom_violin(trim = FALSE, alpha = 0.6) +
geom_boxplot(width = 0.15, outlier.shape = NA) +
labs(title = "Sepal length by species", x = NULL, y = "Sepal length") +
theme(legend.position = "none")
d_small <- ggplot2::diamonds |>
dplyr::sample_n(2000) |>
dplyr::mutate(cut = forcats::fct_lump_n(cut, 4))
ggplot(d_small, aes(carat, price, color = cut)) +
geom_point(alpha = 0.6) +
facet_wrap(~ cut) +
scale_y_continuous(labels = scales::dollar) +
labs(title = "Price vs Carat by cut (sample of diamonds)") +
theme(legend.position = "none")
labs(title, subtitle, x, y, caption)
scale_x/y_*
, color/fill scales
(scale_color_viridis_c/d()
)theme_minimal()
, theme_bw()
,
theme(legend.position="bottom")
p <- ggplot(mtcars, aes(wt, mpg, color = disp)) +
geom_point() +
scale_color_viridis_c(option = "C") +
labs(
title = "Polished scatterplot", subtitle = "Using viridis and minimal theming",
x = "Weight (1000 lbs)", y = "MPG", color = "Displacement"
) +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")
p
These are “nice to have” patterns you can add when appropriate.
Great to compare many distributions compactly. Requires ggridges.
library(ggridges)
df <- ggplot2::diamonds |>
dplyr::sample_n(3000) |>
dplyr::mutate(cut = forcats::fct_lump_n(cut, 5))
ggplot(df, aes(x = price, y = cut, fill = stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "price", option = "C") +
labs(title = "Ridgeline — price distribution by cut", x = "Price", y = NULL) +
theme_ridges() +
theme(legend.position = "bottom")
Gives distribution shape, summary, and raw points together.
library(ggdist) # for stat_slab / stat_dots
set.seed(42)
toy <- tibble(
grp = rep(c("A","B","C"), each = 100),
val = c(rnorm(100, 5, 1), rnorm(100, 6, 1.2), rnorm(100, 6.5, 1.1))
)
ggplot(toy, aes(x = grp, y = val, fill = grp)) +
ggdist::stat_halfeye(adjust = .5, width = .6, .width = 0, justification = -.2, point_colour = NA) +
geom_boxplot(width = .15, outlier.shape = NA) +
ggdist::stat_dots(side = "left", justification = 1.1, binwidth = .05) +
coord_flip() +
labs(title = "Raincloud‑style distribution") +
theme_minimal() +
theme(legend.position = "none")
library(ggbeeswarm)
ggplot(iris, aes(Species, Sepal.Length, color = Species)) +
ggbeeswarm::geom_beeswarm() +
labs(title = "Beeswarm: show all points with less overlap") +
theme(legend.position = "none")
library(plotly)
p <- ggplot(mtcars, aes(wt, mpg, color = factor(cyl))) + geom_point()
plotly::ggplotly(p)
Requires gganimate + gifski. Keep datasets small to knit smoothly.
library(gganimate)
set.seed(7)
df <- tibble(t = rep(1:30, each = 30),
x = rnorm(900), y = rnorm(900))
anim <- ggplot(df, aes(x, y)) +
geom_point(alpha = 0.6) +
transition_time(t) +
labs(title = "t = {frame_time}")
anim_save("animation.gif", animate(anim, nframes = 60, fps = 15, width = 600, height = 450))
ggplot
).