data <- read_excel("../00_data/myData.xlsx")
data
## # A tibble: 49,384 × 25
## player season rank date game_…¹ age team at opp
## <chr> <dbl> <dbl> <dttm> <dbl> <chr> <chr> <chr> <chr>
## 1 Alex Ovechk… 2006 1 2005-10-05 00:00:00 1 20-0… WSH NA CBJ
## 2 Alex Ovechk… 2006 2 2005-10-07 00:00:00 2 20-0… WSH NA ATL
## 3 Alex Ovechk… 2006 3 2005-10-08 00:00:00 3 20-0… WSH @ ATL
## 4 Alex Ovechk… 2006 4 2005-10-10 00:00:00 4 20-0… WSH NA NYR
## 5 Alex Ovechk… 2006 5 2005-10-12 00:00:00 5 20-0… WSH @ CAR
## 6 Alex Ovechk… 2006 6 2005-10-13 00:00:00 6 20-0… WSH NA NYI
## 7 Alex Ovechk… 2006 7 2005-10-16 00:00:00 7 20-0… WSH NA TBL
## 8 Alex Ovechk… 2006 8 2005-10-20 00:00:00 8 20-0… WSH @ FLA
## 9 Alex Ovechk… 2006 9 2005-10-22 00:00:00 9 20-0… WSH NA CAR
## 10 Alex Ovechk… 2006 10 2005-10-26 00:00:00 10 20-0… WSH @ BUF
## # … with 49,374 more rows, 16 more variables: location <chr>, outcome <chr>,
## # goals <dbl>, assists <dbl>, points <dbl>, plus_minus <dbl>,
## # penalty_min <dbl>, goals_even <dbl>, goals_powerplay <dbl>,
## # goals_short <dbl>, goals_gamewinner <dbl>, assists_even <chr>,
## # assists_powerplay <chr>, assists_short <chr>, shots <dbl>,
## # shot_percent <chr>, and abbreviated variable name ¹​game_num
Introduction
Questions
Variation
ggplot(data = head(data, 31)) +
geom_point(mapping = aes(x = date, y = goals))

Visualizing distributions
ggplot(data = head(data, 31)) +
geom_histogram(mapping = aes(x = goals), binwidth = 1)

Typical values
ggplot(data = head(data, 31)) +
geom_freqpoly(mapping = aes(x = goals), binwidth = 1)

Unusual values
ggplot(data = head(data, 31)) +
geom_histogram(mapping = aes(x = assists), binwidth = .1)

Missing Values
Covariation
ggplot(data = head(data, 31)) +
geom_freqpoly(mapping = aes(x = goals), binwidth = .5)

A categorical and continuous variable
data %>%
ggplot(aes(x =date , y = goals)) +
geom_boxplot()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

Two categorical variables
data %>%
ggplot(aes(x =date , y = goals)) +
geom_tile()

Two continous variables
ggplot(data = head(data, 31)) +
geom_point(mapping = aes(x = date, y = goals))

Patterns and models
ggplot(data = head(data, 31)) +
geom_freqpoly(mapping = aes(x = goals), binwidth = .25)
