1. Basics

Key function: geom_boxplot()

Key arguments to customize the plot:

width: the width of the box plot notch: If TRUE, creates a notched box plot.

       The notch displays a confidence interval around the median which is normally based on the median +/- 1.58*IQR/sqrt(n). 
       
       Notches are used to compare groups; if the notches of two boxes do not overlap, this is a strong evidence that the medians differ.

color, size, linetype: Border line color, size and type

fill: box plot areas fill color

outlier.colour, outlier.shape, outlier.size: The color, the shape and the size for outlying points.

library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble  3.0.5     v dplyr   1.0.3
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
ggplot(mtcars, aes(x = as.factor(cyl), 
                   y = mpg)) + 
  geom_boxplot(fill="slateblue", alpha=0.2) + 
  xlab("cyl")

head(mpg)
## # A tibble: 6 x 11
##   manufacturer model displ  year   cyl trans      drv     cty   hwy fl    class 
##   <chr>        <chr> <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr> 
## 1 audi         a4      1.8  1999     4 auto(l5)   f        18    29 p     compa~
## 2 audi         a4      1.8  1999     4 manual(m5) f        21    29 p     compa~
## 3 audi         a4      2    2008     4 manual(m6) f        20    31 p     compa~
## 4 audi         a4      2    2008     4 auto(av)   f        21    30 p     compa~
## 5 audi         a4      2.8  1999     6 auto(l5)   f        16    26 p     compa~
## 6 audi         a4      2.8  1999     6 manual(m5) f        18    26 p     compa~
class(mpg$class)
## [1] "character"
class(mpg$hwy)
## [1] "integer"
# geom_boxplot proposes several arguments to custom appearance
ggplot(mpg, aes(x = class, 
                y = hwy)) +
  geom_boxplot(color = "orange",
               fill = "orange",
               alpha = 0.2,               # custom boxes
               notchwidth = 0.8,# Notch?
               # custom outliers
               outlier.colour=  "blue",
               outlier.fill= "blue",
               outlier.size = 3)

# geom_boxplot proposes several arguments to custom appearance
ggplot(mpg, aes(x = class, 
                y = hwy)) +
  geom_boxplot(color = "orange",
               fill = "orange",
               alpha = 0.2, # custom boxes
               notch = TRUE,
               notchwidth = 0.8,# Notch?
               
               # custom outliers
               outlier.colour=  "blue",
               outlier.fill= "blue",
               outlier.size = 3)
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.

Controlling colors

Set a unique color with fill, colour, and alpha

ggplot(mpg, aes(x=class, 
                y=hwy)) + 
    geom_boxplot(color = "red", 
                 fill = "purple", 
                 alpha = 0.3)

Set a different color for each group

ggplot(mpg, aes(x = class, 
                y = hwy, 
                fill = class)) + # color
    geom_boxplot(alpha = 0.3) 

ggplot(mpg, aes(x = class, 
                y = hwy, 
                fill = class)) + 
    geom_boxplot(alpha = 0.3) +
    scale_fill_brewer(palette= "Dark2") 

The EDA of gapminder data can be found at the following link https://trungmdang.wixsite.com/website/post/revisiting-the-gapminder-dataset-using-dplyr-and-ggplot2

library(gapminder)
gapminder %>% 
  ggplot(aes(x = continent, y = lifeExp, fill = continent)) +
  geom_boxplot() 

Highlighting a group of interest

library(dplyr)
mpg %>% mutate(type = ifelse(class=="subcompact","Highlighted","Normal")) %>% 
  select(class,hwy, type) %>% 
  head(10)
## # A tibble: 10 x 3
##    class     hwy type  
##    <chr>   <int> <chr> 
##  1 compact    29 Normal
##  2 compact    29 Normal
##  3 compact    31 Normal
##  4 compact    30 Normal
##  5 compact    26 Normal
##  6 compact    26 Normal
##  7 compact    27 Normal
##  8 compact    26 Normal
##  9 compact    25 Normal
## 10 compact    28 Normal
mpg %>% mutate(type = ifelse(class=="subcompact","Highlighted","Normal")) %>% 
  ggplot(aes(x = class, 
             y = hwy, 
             fill = type)) + 
  geom_boxplot() +
  scale_fill_manual(values=c("blue", "grey")) +
  theme(axis.title.x= element_blank(),
        legend.position = "none")

Grouped boxplot

library(gapminder)
gapminder %>% 
  filter(year %in% c(1957,1987,2007) & continent != "Oceania") %>%
  ggplot(aes(x = continent, 
             y = lifeExp, 
             fill = factor(year))) + # the variable year is int, this variable must be converted into factor.
  geom_boxplot() +
  theme(axis.title.x= element_blank(),
        legend.position = "none")

Another way to make grouped boxplot is to use facet in ggplot

gapminder %>% 
  filter(continent != "Oceania") %>% 
  ggplot(aes(x = continent, 
             y = lifeExp, 
             fill = continent)) +
  geom_boxplot() +
  facet_wrap(~year) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title.x= element_blank(),
        legend.position = "none")

gapminder %>% 
  filter(year %in% c(1952,1962,1972,1982,1992,2002)) %>%
  filter(continent != 'Oceania') %>%
  ggplot(aes(x= factor(year),
             y= lifeExp, 
             fill = continent)) +
  geom_boxplot() +
  facet_wrap(~continent, ncol = 4) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title.x= element_blank(),
        legend.position = "none")