Variation
Visualizing distributions
Batting %>%
ggplot(aes(x = teamID)) +
geom_bar()

Batting %>%
ggplot(mapping = aes(x = H)) +
geom_histogram(binwidth = 100)

Batting %>%
filter(X2B < 100) %>%
ggplot(aes(x = H)) +
geom_histogram(binwidth = 100)

Batting %>%
ggplot(aes(x = H, color = H)) +
geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Typical values
Batting %>%
# filter out diamonds > 3 carat
filter(X2B > 0) %>%
# plot
ggplot(aes(x = X2B)) +
geom_histogram(binwidth = 50)

Batting %>%
ggplot(aes(HR)) +
geom_histogram(binwidth = 10)

Unusual values
Batting %>%
ggplot(aes(RBI)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 756 rows containing non-finite values (stat_bin).

Batting %>%
ggplot(aes(RBI)) +
geom_histogram() +
coord_cartesian(ylim = c(50, 100))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 756 rows containing non-finite values (stat_bin).

Covariation
A categorical and continuous variable
Batting %>%
ggplot(aes(x = AB, y = HR)) +
geom_boxplot()
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

Two categorical variables
Batting %>%
count(AB, X3B) %>%
ggplot(aes(x = AB, y = X3B, fill = n)) +
geom_tile()

Two continous variables
library(hexbin)
Batting %>%
ggplot(aes(x = AB, y = HR)) +
geom_hex()

Batting %>%
filter(HR < 10) %>%
ggplot(aes(x = AB, y = RBI)) +
geom_boxplot(aes(HR, 20))
