Import data
# excel file
data <- read_excel("../00_data/NHLDATA.xlsx")
Introduction
Questions
Variation
Visualizing distributions
ggplot(data = data) +
geom_bar(mapping = aes(x = birth_month))

ggplot(data = data) +
geom_histogram(mapping = aes(x = birth_year), binwidth = 0.5)

ggplot(data = data, mapping = aes(x = birth_month, colour = birth_country)) +
geom_freqpoly()

Typical values
data %>%
ggplot(aes(x = birth_year)) +
geom_histogram(binwith = 0.25)

Unusual values
data %>%
ggplot(aes(x = birth_month)) +
geom_histogram()

data %>%
ggplot(aes(x = birth_year)) +
geom_histogram() +
coord_cartesian(ylim = c(500,800))

Missing Values
data %>%
mutate(
birth_country = birth_country,
birth_month = birth_month
) %>%
ggplot(aes(x = birth_month, y = birth_country)) +
geom_point()

Covariation
A categorical and continuous variable
ggplot(data = data, mapping = aes(x = birth_year)) +
geom_freqpoly(mapping = aes(colour = birth_country))
