Introduction
data %>% filter(births > 30000)
## # A tibble: 235 × 3
## year month births
## <dbl> <dbl> <dbl>
## 1 1991 1 32213
## 2 1991 2 30345
## 3 1991 3 34869
## 4 1991 4 35398
## 5 1991 5 36371
## 6 1991 6 34378
## 7 1991 7 35436
## 8 1991 8 34421
## 9 1991 9 34410
## 10 1991 10 33092
## # ℹ 225 more rows
Questions
Variation
Visualizing distributions
diamonds %>%
ggplot(aes(x=cut))

geom_bar()
## geom_bar: just = 0.5, width = NULL, na.rm = FALSE, orientation = NA
## stat_count: width = NULL, na.rm = FALSE, orientation = NA
## position_stack
diamonds %>%
ggplot(mapping = aes(x = carat))

geom_histogram(binwidth = 0.5)
## geom_bar: na.rm = FALSE, orientation = NA
## stat_bin: binwidth = 0.5, bins = NULL, na.rm = FALSE, orientation = NA, pad = FALSE
## position_stack
Typical values
ggplot(data = faithful, mapping = aes(x = eruptions)) +
geom_histogram(binwidth = 0.25)

Unusual values
ggplot(diamonds) +
geom_histogram(mapping = aes(x = y), binwidth = 0.5)

Missing Values
Covariation
A categorical and continuous variable
Two categorical variables
Two continous variables
Patterns and models