Load the data
library(datasets)
library(ggplot2)
data("airquality")
#View("airquality")
airquality$Month <- factor(airquality$Month,
labels = c('May', 'Jun', 'Jul', 'Aug', 'Sep'))
Box plot
ggplot(airquality, aes(x = Month, y = Ozone)) +
geom_boxplot()
## Warning: Removed 37 rows containing non-finite values (stat_boxplot).

fill = "#4271AE"
line = "#1F3552"
ggplot(airquality, aes(x = Month, y = Ozone)) +
geom_boxplot(fill = fill, color = line, alpha = .7,
outlier.colour = line, outlier.shape = 8)
## Warning: Removed 37 rows containing non-finite values (stat_boxplot).

# notch : draws more attention to mean
# geom_jitter(): show data points
# outlier.shape = NA: avoid duplicated points caused by geom_jitter()
ggplot(airquality, aes(x = Month, y = Ozone)) +
geom_boxplot(fill = fill, color = line, alpha = .7,
notch = TRUE,
outlier.shape = NA) +
geom_jitter(shape = 20, alpha = .5) +
# alternative: geom_point(alpha = .4) all points will be on a vertical line
scale_y_continuous(name = "Mean ozone in\nparts per billion",
breaks = seq(0, 175, 25),
limits = c(0,175)) +
scale_x_discrete(name = "Month") +
ggtitle("Boxplot of mean ozone by month") +
theme_bw()
## Warning: Removed 37 rows containing non-finite values (stat_boxplot).
## notch went outside hinges. Try setting notch=FALSE.
## Warning: Removed 37 rows containing missing values (geom_point).

Plot 2: temperature high/low
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.3 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
months = c("Jul", "Aug", "Sep")
airquality_trimmed = filter(airquality, Month %in% months)
# A new binomial variable - 1: temp > mean; 0: otherwise
airquality_trimmed$Temp.f = factor(ifelse(airquality_trimmed$Temp > mean(airquality_trimmed$Temp), 1, 0),
labels = c("Low Temp", "High Temp"))
library(RColorBrewer)
# scale_fill_brewer() depends on package "RColorBrewer" for coloring
ggplot(airquality_trimmed, aes(x = Month, y = Ozone, fill = Temp.f)) +
geom_boxplot(alpha = .7, outlier.shape = NA) +
geom_jitter(aes(color = Temp.f), shape = 20, alpha = .5) +
scale_fill_brewer(palette = "Accent") +
scale_color_brewer(palette = "Accent") +
theme_bw()
## Warning: Removed 11 rows containing non-finite values (stat_boxplot).
## Warning: Removed 11 rows containing missing values (geom_point).
