# ggplot2 notes

knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)

Packages

library(ggplot2)
library(magrittr)
library(dplyr)
library(tidyr)
library(titanic)
library(faraway)
library(GGally)
library(gridExtra)
library(ggthemes)
library(viridis)
library(dlnm)
library(RColorBrewer)
library(forcats)
data("titanic_train", package = "titanic")
titanic <- titanic_train
data("worldcup")

Generic code

object <- ggplot(dataframe, aes(x = column_1, y = column_2))

or, if you don’t need to save the object ggplot(dataframe, aes(x = column_1, y = column_2))

Basic histogram

ggplot(data = titanic, aes(x = Fare)) + 
        geom_histogram()

or

ggplot(data = titanic) + 
        geom_histogram(aes(x = Fare))

or

ggplot() + 
        geom_histogram(data = titanic, aes(x = Fare))

or

titanic %>%
        ggplot() + 
        geom_histogram(aes(x = Fare))

or

titanic %>%
        ggplot(aes(x = Fare)) + 
        geom_histogram()

change # of bins

ggplot(titanic, aes(x = Fare)) + 
        geom_histogram(bins = 15)

basic scatterplot

ggplot(worldcup, aes(x = Time, y = Passes)) + 
        geom_point()

size and color aesthetics

ggplot(worldcup, aes(x = Time, y = Passes,
                     color = Position, size = Shots)) + 
        geom_point()

Aesthetic args

Code Description x Position on x-axis y Position on y-axis shape Shape color Color of border of elements fill Color of inside of elements size Size alpha Transparency (1: opaque; 0: transparent) linetype Type of line (e.g., solid, dashed)

Geoms

Function Common aesthetics Common arguments geom_point() x, y
geom_line() x, y arrow, na.rm geom_segment() x, y, xend, yend arrow, na.rm geom_path() x, y na.rm geom_polygon() x, y
geom_histogram() x bins, binwidth geom_abline() intercept, slope
geom_hline() yintercept
geom_vline() xintercept
geom_smooth() x, y method, se, span geom_text() x, y, label parse, nudge_x, nudge_y

Multiple geoms

noteworthy_players <- worldcup %>% filter(Shots == max(Shots) | 
                                                  Passes == max(Passes)) %>%
        mutate(point_label = paste(Team, Position, sep = ", "))

ggplot(worldcup, aes(x = Passes, y = Shots)) + 
        geom_point() + 
        geom_text(data = noteworthy_players, aes(label = point_label), 
                  vjust = "inward", hjust = "inward") 

Verticle lines every 90min

ggplot(worldcup, aes(x = Time)) + 
        geom_histogram(binwidth = 10) + 
        geom_vline(xintercept = 90 * 0:6,
                   color = "blue", alpha = 0.5)

Constant aesthetics

ggplot(worldcup, aes(x = Time, y = Passes)) + 
        geom_point(color = "darkgreen")

Other useful plot additions

Element Description ggtitle Plot title xlab, ylab x- and y-axis labels xlim, ylim Limits of x- and y-axis

eXAMPLE Plots

data(nepali)

nepali <- nepali %>%
        select(id, sex, wt, ht, age) %>%
        mutate(id = factor(id),
               sex = factor(sex, levels = c(1, 2),
                            labels = c("Male", "Female"))) %>%
        distinct(id, .keep_all = TRUE)

Histograms

ggplot(nepali, aes(x = ht)) + 
        geom_histogram()

ggplot(nepali, aes(x = ht)) + 
        geom_histogram(fill = "lightblue", color = "black") + 
        ggtitle("Height of children") + 
        xlab("Height (cm)") + xlim(c(0, 120))

Scatterplots

ggplot(nepali, aes(x = ht, y = wt)) + 
        geom_point()

ggplot(nepali, aes(x = ht, y = wt)) + 
        geom_point(color = "blue", size = 0.5) + 
        ggtitle("Weight versus Height") + 
        xlab("Height (cm)") + ylab("Weight (kg)")

ggplot(nepali, aes(x = ht, y = wt, color = sex)) + 
        geom_point(size = 0.5) + 
        ggtitle("Weight versus Height") + 
        xlab("Height (cm)") + ylab("Weight (kg)")

Boxplots

ggplot(nepali, aes(x = 1, y = ht)) + 
        geom_boxplot() + 
        xlab("")+ ylab("Height (cm)")

ggplot(nepali, aes(x = sex, y = ht)) + 
        geom_boxplot() + 
        xlab("Sex")+ ylab("Height (cm)") 

Common Extensions

Pairs

ggpairs(nepali %>% select(sex, wt, ht, age))

More here: https://www.ggplot2-exts.org/

Customizing ggplot2 Plots

data(nepali)
data(worldcup)
data(chicagoNMMAPS)
chic <- chicagoNMMAPS
chic_july <- chic %>%
        filter(month == 7 & year == 1995)
ggplot(worldcup, aes(x = Time, y = Shots)) + 
        geom_point() + 
        theme_tufte()

chicago_plot <- ggplot(chic_july, aes(x = date, y = death)) + 
        xlab("Day in July 1995") + 
        ylab("All-cause deaths") + 
        ylim(0, 450) 

chicago_plot + 
        geom_area(fill = "black") + 
        theme_excel() 

chicago_plot + 
        geom_line() + 
        theme_tufte() 

Clean lables

wc_example_data <- worldcup %>%
  dplyr::rename(Pos = Position) %>%
  mutate(Pos = fct_recode(Pos,
                          "DC" = "Defender",
                          "FW" = "Forward", 
                          "GK" = "Goalkeeper",
                          "MF" = "Midfielder"))
wc_example_data %>%
        mutate(Pos = fct_recode(Pos,
                                "Defender" = "DC",
                                "Forward" = "FW", 
                                "Goalkeeper" = "GK",
                                "Midfielder" = "MF")) %>%
        ggplot(aes(x = Pos)) +
        geom_bar(fill = "lightgray") + 
        xlab("") + 
        ylab("Number of players") + 
        coord_flip() + 
        theme_tufte()

Smoothed lowess curve

ggplot(filter(worldcup, Position == "Forward"), aes(x = Passes, y = Shots)) + 
        geom_point(size = 1.5) + 
        theme_few()  + 
        geom_smooth()

Highlight a couple players

noteworthy_players <- worldcup %>%
        filter(Shots == max(Shots) | Passes == max(Passes)) %>%
        mutate(point_label = paste0(Team, Position, sep = ", "))

ggplot(worldcup, aes(x = Passes, y = Shots)) + 
        geom_point(alpha = 0.5) + 
        geom_text(data = noteworthy_players, aes(label = point_label),
                  vjust = "inward", hjust = "inward", color = "blue") +
        theme_few()

Small multiples

worldcup %>%
        ggplot(aes(x = Time, y = Shots, color = Position)) + 
        geom_point() 

Facets

worldcup %>%
        ggplot(aes(x = Time, y = Shots)) + 
        geom_point() +
        facet_grid(. ~ Position) 

Two team facet relationships

worldcup %>%
        filter(Team %in% c("Spain", "Netherlands")) %>%
        ggplot(aes(x = Time, y = Shots)) + 
        geom_point() +
        facet_grid(Team ~ Position) 

Facet Wrap

worldcup %>%
        ggplot(aes(x = Time, y = Shots)) + 
        geom_point(alpha = 0.25) +
        facet_wrap(~ Team, ncol = 6) 

Reorder factors

nepali <- nepali %>%
        mutate(sex = factor(sex, levels = c(1, 2), 
                            labels = c("Male", "Female")))

ggplot(nepali, aes(ht, wt)) + 
        geom_point() + 
        facet_grid(. ~ sex)

nepali <- nepali %>%
        mutate(sex = factor(sex, levels = c("Female", "Male")))

ggplot(nepali, aes(ht, wt)) + 
        geom_point() + 
        facet_grid(. ~ sex)

Meaningful ordering

worldcup %>%
        group_by(Team) %>%
        summarize(mean_time = mean(Time)) %>%
        arrange(mean_time) %>%                         # re-order and re-set
        mutate(Team = factor(Team, levels = Team)) %>% # factor levels before plotting
        ggplot(aes(x = mean_time, y = Team)) + 
        geom_point() + 
        theme_few() + 
        xlab("Mean time per player (minutes)") + ylab("") 

worldcup %>%
        select(Position, Time, Shots) %>%
        group_by(Position) %>%
        mutate(ave_shots = mean(Shots),
               most_shots = Shots == max(Shots)) %>%
        ungroup() %>%
        arrange(ave_shots) %>%
        mutate(Position = factor(Position, levels = unique(Position))) %>%
        ggplot(aes(x = Time, y = Shots, color = most_shots)) + 
        geom_point(alpha = 0.5) + 
        scale_color_manual(values = c("TRUE" = "red", "FALSE" = "black"),
                           guide = FALSE) + 
        facet_grid(. ~ Position) + 
        theme_few()

worldcup %>%
        dplyr::select(Team, Time) %>%
        dplyr::group_by(Team) %>%
        dplyr::mutate(ave_time = mean(Time),
                      min_time = min(Time),
                      max_time = max(Time)) %>%
        dplyr::arrange(ave_time) %>%
        dplyr::ungroup() %>%
        dplyr::mutate(Team = factor(Team, levels = unique(Team))) %>%
        ggplot(aes(x = Time, y = Team)) + 
        geom_segment(aes(x = min_time, xend = max_time, yend = Team),
                     alpha = 0.5, color = "gray") + 
        geom_point(alpha = 0.5) + 
        geom_point(aes(x = ave_time), size = 2, color = "red", alpha = 0.5) + 
        theme_minimal() + 
        ylab("")

Scales and Color

ggplot(worldcup, aes(x = Time, y = Passes, color = Position, size = Shots)) + 
        geom_point(alpha = 0.5)

Continuous Axis

ggplot(worldcup, aes(x = Time, y = Passes, color = Position, size = Shots)) + 
        geom_point(alpha = 0.5) + 
        scale_x_continuous(name = "Time played (minutes)", 
                           breaks = 90 * c(2, 4, 6),
                           minor_breaks = 90 * c(1, 3, 5))

Continuous Scale

ggplot(worldcup, aes(x = Time, y = Passes, color = Position, size = Shots)) + 
        geom_point(alpha = 0.5) + 
        scale_x_continuous(name = "Time played (minutes)", 
                           breaks = 90 * c(2, 4, 6),
                           minor_breaks = 90 * c(1, 3, 5)) + 
        scale_size_continuous(name = "Shots on goal",
                              breaks = c(0, 10, 20))

log scale on y-axis

ggplot(chic_july, aes(x = date, y = death)) + 
        geom_line() +
        scale_y_log10(breaks = c(1:4 * 100))

Color palates: http://colorbrewer2.org/

display.brewer.pal(name = "Set1", n = 8)

display.brewer.pal(name = "PRGn", n = 8)

display.brewer.pal(name = "PuBuGn", n = 8)

wc_example <- ggplot(worldcup, aes(x = Time, y = Passes,
                                   color = Position, size = Shots)) + 
        geom_point(alpha = 0.5) 

a <- wc_example + 
        scale_color_brewer(palette = "Set1") + 
        ggtitle("Set1")
b <- wc_example + 
        scale_color_brewer(palette = "Dark2") + 
        ggtitle("Dark2")
c <- wc_example + 
        scale_color_brewer(palette = "Pastel2") + 
        ggtitle("Pastel2") + 
        theme_dark()
d <- wc_example + 
        scale_color_brewer(palette = "Accent") + 
        ggtitle("Accent")
grid.arrange(a, b, c, d, ncol = 2)

worldcup_ex <- worldcup %>%
        ggplot(aes(x = Time, y = Shots, color = Passes)) + 
        geom_point(size = 0.9) 

magma_plot <- worldcup_ex + 
        scale_color_viridis(option = "A") + 
        ggtitle("magma")
inferno_plot <- worldcup_ex + 
        scale_color_viridis(option = "B") + 
        ggtitle("inferno")
plasma_plot <- worldcup_ex + 
        scale_color_viridis(option = "C") + 
        ggtitle("plasma")
viridis_plot <- worldcup_ex + 
        scale_color_viridis(option = "D") + 
        ggtitle("viridis")

grid.arrange(magma_plot, inferno_plot, plasma_plot, viridis_plot, ncol = 2)