Variation
Visualizing distributions
ggplot(data = bee_colonies) +
geom_bar(mapping = aes(x = months))

bee_colonies %>% count(months)
## # A tibble: 4 × 2
## months n
## <chr> <int>
## 1 April-June 329
## 2 January-March 329
## 3 July-September 282
## 4 October-December 282
ggplot(data = bee_colonies) +
geom_histogram(mapping = aes(x = colony_lost))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 47 rows containing non-finite values (`stat_bin()`).

# Unusual values
bee_colonies %>%
ggplot(aes(x = colony_lost)) +
geom_histogram() +
coord_cartesian(ylim = c(0, 50))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 47 rows containing non-finite values (`stat_bin()`).

ggplot(data = bee_colonies, mapping = aes(x = colony_lost, colour = months)) +
geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 47 rows containing non-finite values (`stat_bin()`).

A categorical and continuous variable
bee_colonies %>%
ggplot(aes(x = months, y = colony_lost_pct)) +
geom_boxplot()
## Warning: Removed 54 rows containing non-finite values (`stat_boxplot()`).

Two categorical variables
bee_colonies %>%
count(months, state) %>%
ggplot(aes(x = months, y = state, fill = n)) +
geom_tile()

Two continous variables
ggplot(data = bee_colonies) +
geom_point(mapping = aes(x = colony_size, y = colony_lost), alpha = 1/100)
## Warning: Removed 47 rows containing missing values (`geom_point()`).

library(hexbin)
bee_colonies %>%
ggplot(aes(x = colony_size, y = colony_lost)) +
geom_hex()
## Warning: Removed 47 rows containing non-finite values (`stat_binhex()`).

Patterns and models
library(modelr)
mod <- lm(log(colony_lost) ~ log(colony_size), data = bee_colonies)
bee_colonies2 <- bee_colonies %>%
modelr::add_residuals(mod) %>%
mutate(resid = exp(resid))
bee_colonies2 %>%
ggplot(aes(colony_size, resid)) +
geom_point()
## Warning: Removed 47 rows containing missing values (`geom_point()`).

bee_colonies2 %>%
ggplot(aes(months, resid)) +
geom_boxplot() +
coord_cartesian(ylim = c(0,25))
## Warning: Removed 47 rows containing non-finite values (`stat_boxplot()`).
