Working Code

library(dplyr)
library(ggplot2)

data(mtcars)
glimpse(mtcars)
# Miles per gallon versus weight, coloured by cylinders:
ggplot(mtcars, aes(x = wt, y = mpg, colour = factor(cyl))) + geom_point()

# Average fuel efficiency of 4 cylinder cars, by transmission type
mtcars %>% 
    filter(cyl == 4) %>% 
    group_by(am) %>% 
    summarise(mean_mpg = mean(mpg))
    
# Plot of manual cars, different plots for different numbers of cylinders
mtcars %>% 
    filter(am == 1) %>%
    ggplot(aes(x = wt, y = mpg)) + 
        geom_point() + 
        geom_smooth(method = "lm", se = FALSE) + 
        facet_wrap(~ cyl)

# Heat map of number of cars in each cylinder/transmission combo
mtcars %>% 
    group_by(cyl, am) %>% 
    summarise(combos = n()) %>% 
    ggplot(aes(x = cyl, y = am, fill = combos)) + 
        geom_tile() +   
        scale_fill_gradientn(colours = c("darkorchid", "grey90", "forestgreen"))


# A new dataset
# First: Some cleaning

# Convert trans to be 0 for automatic, 1 for manual
substr(x = "abcde", start = 2, stop = 5) # Take the second to fifth letters
substr("abcde", 1, 1) # just take the first letter
mpg <- mpg %>% mutate(trans2 = substr(trans, 1, 1))

# calculate mpg according to transport standards
# weighted sum of highway and city fuel efficiency
mpg <- mpg %>% mutate(fuel_eff = 0.55*cty + 0.45*hwy)

# Change variables into factors so that R doesn't try and do math
mpg <- mpg %>% 
    mutate(trans2 = factor(trans2), cyl = factor(cyl))

glimpse(mpg)

Same Plots, Different Data

# Miles per gallon versus weight, coloured by cylinders:
ggplot(mtcars, aes(x = wt, y = mpg, colour = factor(cyl))) + geom_point()

# Average fuel efficiency of 4 cylinder cars, by transmission type
mpg %>% 
    filter(??? == 4) %>% 
    group_by(???) %>% 
    summarise(??? = mean(???))

# Plot of manual cars, different plots for different numbers of cylinders
mpg %>% 
    filter(??? == ???) %>%
    ggplot(aes(x = ???, y = ???)) + 
    geom_point() + 
    geom_smooth(method = "lm", se = FALSE) + 
    facet_wrap(~ ???)

# Heat map of number of cars in each cylinder/transmission combo
mpg %>% 
    group_by(???, ???) %>% 
    summarise(??? = n()) %>% 
    ggplot(aes(x = ???, y = ???, fill = combos)) + 
    geom_tile() +   
    scale_fill_gradientn(colours = c("darkorchid", "grey90", "forestgreen"))

What do you notice about the two sets of plots? Would you make the same conclusions about the fuel efficiency of cars? Note: the mpg data set has all variations of the Audi A4, so it was likely not a random sample.

Bonus

Re-create the following stacked boxplot for the mpg dataset. Hint: you’ll need fill = ??? and geom_boxplot().

Example of the greatness of ggplot2

Copy this code into a script file and add a comment on every line to explain what it’s doing.

The beauty of ggplot2: this is possible! (Other plotting methods have very difficult code.)

library(ggplot2)
ggplot(data = diamonds,
    mapping = aes(x = carat,
        y = price, 
        colour = cut)) +
    geom_point() +
    facet_wrap(~ clarity) +
    geom_smooth(se = FALSE,
        method = "lm",
        formula = y ~ poly(x, 2)) + 
    theme_light() + 
    theme(legend.position = "bottom") + 
    scale_y_sqrt(minor_breaks = seq(0, 30000, 1000),
        breaks = seq(0, 30000, 5000)) + 
    labs(x = "Carat", y = "Price",
        colour = "Cut Quality",
        title = "Quadratic (linear) models for different cuts and carats on the square-root scale.",
        caption = "Source: ggplot2 data")

Continuing your learning