games <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2020/2020-02-04/games.csv')
## Rows: 5324 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): week, home_team, away_team, winner, tie, day, date, home_team_nam...
## dbl (7): year, pts_win, pts_loss, yds_win, turnovers_win, yds_loss, turnov...
## time (1): time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
games %>% count(day)
## # A tibble: 7 × 2
## day n
## <chr> <int>
## 1 Fri 3
## 2 Mon 339
## 3 Sat 178
## 4 Sun 4588
## 5 Thu 214
## 6 Tue 1
## 7 Wed 1
day_levels <- c("Sun", "Mon", "Thu", "Sat", "Fri", "Tue", "Wed")
games_rev <- games %>%
mutate(day = day %>% factor(levels = day_levels))
Make two bar charts here - one before ordering another after
#unordered
pts_win_summary <- games_rev %>%
group_by(day) %>%
summarise(
avg_pts_win = mean(pts_win, na.rm = TRUE)
)
pts_win_summary
## # A tibble: 7 × 2
## day avg_pts_win
## <fct> <dbl>
## 1 Sun 27.7
## 2 Mon 28.9
## 3 Thu 28.3
## 4 Sat 28.1
## 5 Fri 35.3
## 6 Tue 24
## 7 Wed 24
pts_win_summary %>%
ggplot(aes(x = day, y = avg_pts_win)) +
geom_point()
#reordered
pts_win_summary %>%
ggplot(aes(x = fct_reorder(day, avg_pts_win), y = avg_pts_win)) +
geom_point()
Show examples of three functions:
games %>%
mutate(day = fct_recode(day,
"Friday" = "Fri",
"Monday" = "Mon",
"Saturday" = "Sat",
"Sunday" = "Sun",
"Thursday" = "Thu",
"Tuesday" = "Tue",
"Wednesday" = "Wed",
)) %>%
count(day, sort = TRUE) %>%
print(n = Inf)
## # A tibble: 7 × 2
## day n
## <fct> <int>
## 1 Sunday 4588
## 2 Monday 339
## 3 Thursday 214
## 4 Saturday 178
## 5 Friday 3
## 6 Tuesday 1
## 7 Wednesday 1
games %>%
mutate(day = fct_collapse(day,
other = c("Fri", "Tue", "Wed"),
Sunday = c("Sun"),
Thursday = c("Thu"),
Monday = c("Mon"),
Saturday = c("Sat")
)) %>%
count(day, sort = TRUE) %>%
print(n = Inf)
## # A tibble: 5 × 2
## day n
## <fct> <int>
## 1 Sunday 4588
## 2 Monday 339
## 3 Thursday 214
## 4 Saturday 178
## 5 other 5
games %>%
mutate(day = fct_lump(day, n = 3)) %>%
count(day, sort = TRUE) %>%
print(n = Inf)
## # A tibble: 4 × 2
## day n
## <fct> <int>
## 1 Sun 4588
## 2 Mon 339
## 3 Thu 214
## 4 Other 183
No need to do anything here.