Import data

data <- read_excel("myData.xlsx")
data

Introduction

Questions

How many teams have a similar PAKE and PACE rank? ## Variation

Visualizing distributions

data %>%
    ggplot(aes(x = PAKE)) +
    geom_bar()

data %>%
    ggplot(mapping = aes(x = PASE)) +
    geom_histogram(binwidth = 0.5)

data %>%
    
    filter(PAKE < 1) %>%
    
    ggplot(aes(x = PAKE)) +
    geom_histogram(binwidth = 0.5)

data %>%
    ggplot(aes(x = PAKE, color = WINPERCENT)) +
    geom_freqpoly()

Typical values

data %>%
    
    # Filter out bigger diamonds
    filter(PAKE < 1) %>%
    
    # Plot
    ggplot(aes(x = PAKE)) +
    geom_histogram(binwidth = 0.01)

data %>%
    
    ggplot(aes(x = PAKE)) +
    geom_histogram()

Unusual values

data %>%
    
    ggplot(aes(x = PAKE)) +
    geom_histogram()

data %>%
    
    ggplot(aes(x = PAKE)) +
    geom_histogram() +
    coord_cartesian(ylim = c(0,50))

Missing Values

data %>%
    

    
    # Plot
    ggplot(aes(x = PAKE, y = WINPERCENT)) +
    geom_point()

Covariation

A categorical and continuous variable

data %>%
    
    ggplot(aes(x = PAKE, y = TEAMID)) +
    geom_boxplot()

Two categorical variables

data %>%
    
    count(PAKE, WINPERCENT) %>%
    
    ggplot(aes(x = PAKE, y = WINPERCENT, fill = n)) +
    geom_tile()

Two continous variables

library(hexbin)
data %>%
    ggplot(aes(x = PAKE, y = WINPERCENT)) +
    geom_hex()

data %>%
    filter(PAKE < 1) %>%
    ggplot(aes(x = PAKE, y = WINPERCENT)) +
    geom_boxplot(aes(group = cut_width(PAKE, 0.1)))

Patterns and models

data %>%
    ggplot(aes(PAKE, WINPERCENT)) +
    geom_point()

data %>%
    ggplot(aes(PAKE, WINPERCENT)) +
    geom_boxplot()