Key function: geom_boxplot()
Key arguments to customize the plot:
width: the width of the box plot notch: If TRUE, creates a notched box plot.
The notch displays a confidence interval around the median which is normally based on the median +/- 1.58*IQR/sqrt(n).
Notches are used to compare groups; if the notches of two boxes do not overlap, this is a strong evidence that the medians differ.
color, size, linetype: Border line color, size and type
fill: box plot areas fill color
outlier.colour, outlier.shape, outlier.size: The color, the shape and the size for outlying points.
library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.5 v dplyr 1.0.3
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
ggplot(mtcars, aes(x = as.factor(cyl),
y = mpg)) +
geom_boxplot(fill="slateblue", alpha=0.2) +
xlab("cyl")
head(mpg)
## # A tibble: 6 x 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compa~
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compa~
## 3 audi a4 2 2008 4 manual(m6) f 20 31 p compa~
## 4 audi a4 2 2008 4 auto(av) f 21 30 p compa~
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compa~
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compa~
class(mpg$class)
## [1] "character"
class(mpg$hwy)
## [1] "integer"
# geom_boxplot proposes several arguments to custom appearance
ggplot(mpg, aes(x = class,
y = hwy)) +
geom_boxplot(color = "orange",
fill = "orange",
alpha = 0.2, # custom boxes
notchwidth = 0.8,# Notch?
# custom outliers
outlier.colour= "blue",
outlier.fill= "blue",
outlier.size = 3)
# geom_boxplot proposes several arguments to custom appearance
ggplot(mpg, aes(x = class,
y = hwy)) +
geom_boxplot(color = "orange",
fill = "orange",
alpha = 0.2, # custom boxes
notch = TRUE,
notchwidth = 0.8,# Notch?
# custom outliers
outlier.colour= "blue",
outlier.fill= "blue",
outlier.size = 3)
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
ggplot(mpg, aes(x=class,
y=hwy)) +
geom_boxplot(color = "red",
fill = "purple",
alpha = 0.3)
ggplot(mpg, aes(x = class,
y = hwy,
fill = class)) + # color
geom_boxplot(alpha = 0.3)
ggplot(mpg, aes(x = class,
y = hwy,
fill = class)) +
geom_boxplot(alpha = 0.3) +
scale_fill_brewer(palette= "Dark2")
The EDA of gapminder data can be found at the following link https://trungmdang.wixsite.com/website/post/revisiting-the-gapminder-dataset-using-dplyr-and-ggplot2
library(gapminder)
gapminder %>%
ggplot(aes(x = continent, y = lifeExp, fill = continent)) +
geom_boxplot()
library(dplyr)
mpg %>% mutate(type = ifelse(class=="subcompact","Highlighted","Normal")) %>%
select(class,hwy, type) %>%
head(10)
## # A tibble: 10 x 3
## class hwy type
## <chr> <int> <chr>
## 1 compact 29 Normal
## 2 compact 29 Normal
## 3 compact 31 Normal
## 4 compact 30 Normal
## 5 compact 26 Normal
## 6 compact 26 Normal
## 7 compact 27 Normal
## 8 compact 26 Normal
## 9 compact 25 Normal
## 10 compact 28 Normal
mpg %>% mutate(type = ifelse(class=="subcompact","Highlighted","Normal")) %>%
ggplot(aes(x = class,
y = hwy,
fill = type)) +
geom_boxplot() +
scale_fill_manual(values=c("blue", "grey")) +
theme(axis.title.x= element_blank(),
legend.position = "none")
library(gapminder)
gapminder %>%
filter(year %in% c(1957,1987,2007) & continent != "Oceania") %>%
ggplot(aes(x = continent,
y = lifeExp,
fill = factor(year))) + # the variable year is int, this variable must be converted into factor.
geom_boxplot() +
theme(axis.title.x= element_blank(),
legend.position = "none")
Another way to make grouped boxplot is to use facet in ggplot
gapminder %>%
filter(continent != "Oceania") %>%
ggplot(aes(x = continent,
y = lifeExp,
fill = continent)) +
geom_boxplot() +
facet_wrap(~year) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x= element_blank(),
legend.position = "none")
gapminder %>%
filter(year %in% c(1952,1962,1972,1982,1992,2002)) %>%
filter(continent != 'Oceania') %>%
ggplot(aes(x= factor(year),
y= lifeExp,
fill = continent)) +
geom_boxplot() +
facet_wrap(~continent, ncol = 4) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x= element_blank(),
legend.position = "none")