Import data

# excel file
data <- read_excel("../00_data/NHLDATA.xlsx")

Introduction

Questions

Variation

Visualizing distributions

ggplot(data = data) +
  geom_bar(mapping = aes(x = birth_month))

ggplot(data = data) +
  geom_histogram(mapping = aes(x = birth_year), binwidth = 0.5)

ggplot(data = data, mapping = aes(x = birth_month, colour = birth_country)) +
  geom_freqpoly()

Typical values

data %>%
    ggplot(aes(x = birth_year)) +
    geom_histogram(binwith = 0.25)

Unusual values

data %>%
    ggplot(aes(x = birth_month)) +
    geom_histogram()

data %>%
    ggplot(aes(x = birth_year)) +
    geom_histogram() +
    coord_cartesian(ylim = c(500,800))

Missing Values

data %>%
  mutate(
    birth_country = birth_country,
    birth_month = birth_month
  ) %>%
  ggplot(aes(x = birth_month, y = birth_country)) +
  geom_point() 

Covariation

A categorical and continuous variable

ggplot(data = data, mapping = aes(x = birth_year)) + 
  geom_freqpoly(mapping = aes(colour = birth_country))