Introduction

Questions

Variation

ggplot(data = myData) +
  geom_bar(mapping = aes(x = team)) +
    theme(axis.text.x = element_text(angle = 90))

Visualizing distributions

ggplot(data = myData) +
  geom_histogram(mapping = aes(x = total))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = myData, mapping = aes(x = total, colour = team)) +
  geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Typical values

ggplot(data = myData, mapping = aes(x = total)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Unusual values

ggplot(myData) + 
  geom_histogram(mapping = aes(x = total)) +
  coord_cartesian(ylim = c(0, 50))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Missing Values

I set the values to be outside my dataset because even though I have outliers they are not missing values or incorecct

myData2 <- myData %>% 
  mutate(y = ifelse(total < 60000 | total > 150000, NA, y))

Covariation

A categorical and continuous variable

ggplot(data = myData, mapping = aes(x = team, y = total)) +
  geom_boxplot()

Two categorical variables

myData %>% 
  count(total, team) %>%  
  ggplot(mapping = aes(x = total, y = team)) +
    geom_count(mapping = aes(fill = n))

Two continous variables

library(hexbin)

ggplot(data = myData) + 
  geom_point(mapping = aes(x = team, y = total), alpha = 1 / 100)+
    theme(axis.text.x = element_text(angle = 90))

ggplot(data = myData) +
  geom_hex(mapping = aes(x = team, y = total)) +
    theme(axis.text.x = element_text(angle = 90))

Patterns and models

library(modelr)

mod <- lm(log(total) ~ log(total), data = myData)

myData2 <- myData %>% 
  add_residuals(mod) %>% 
  mutate(resid = exp(resid))

ggplot(data = myData2) + 
  geom_point(mapping = aes(x = team, y = total)) +
    theme(axis.text.x = element_text(angle = 90))