Introduction
Questions
Variation
ggplot(data = myData) +
geom_bar(mapping = aes(x = team)) +
theme(axis.text.x = element_text(angle = 90))

Visualizing distributions
ggplot(data = myData) +
geom_histogram(mapping = aes(x = total))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = myData, mapping = aes(x = total, colour = team)) +
geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Typical values
ggplot(data = myData, mapping = aes(x = total)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Unusual values
ggplot(myData) +
geom_histogram(mapping = aes(x = total)) +
coord_cartesian(ylim = c(0, 50))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Missing Values
I set the values to be outside my dataset because even though I have
outliers they are not missing values or incorecct
myData2 <- myData %>%
mutate(y = ifelse(total < 60000 | total > 150000, NA, y))
Covariation
A categorical and continuous variable
ggplot(data = myData, mapping = aes(x = team, y = total)) +
geom_boxplot()

Two categorical variables
myData %>%
count(total, team) %>%
ggplot(mapping = aes(x = total, y = team)) +
geom_count(mapping = aes(fill = n))

Two continous variables
library(hexbin)
ggplot(data = myData) +
geom_point(mapping = aes(x = team, y = total), alpha = 1 / 100)+
theme(axis.text.x = element_text(angle = 90))

ggplot(data = myData) +
geom_hex(mapping = aes(x = team, y = total)) +
theme(axis.text.x = element_text(angle = 90))

Patterns and models
library(modelr)
mod <- lm(log(total) ~ log(total), data = myData)
myData2 <- myData %>%
add_residuals(mod) %>%
mutate(resid = exp(resid))
ggplot(data = myData2) +
geom_point(mapping = aes(x = team, y = total)) +
theme(axis.text.x = element_text(angle = 90))
