Import your data

games <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2020/2020-02-04/games.csv')
## Rows: 5324 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (11): week, home_team, away_team, winner, tie, day, date, home_team_nam...
## dbl   (7): year, pts_win, pts_loss, yds_win, turnovers_win, yds_loss, turnov...
## time  (1): time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 15

Create a factor

games %>% count(day)
## # A tibble: 7 × 2
##   day       n
##   <chr> <int>
## 1 Fri       3
## 2 Mon     339
## 3 Sat     178
## 4 Sun    4588
## 5 Thu     214
## 6 Tue       1
## 7 Wed       1
day_levels <- c("Sun", "Mon", "Thu", "Sat", "Fri", "Tue", "Wed")

games_rev <- games %>%
    mutate(day = day %>% factor(levels = day_levels))

Modify factor order

Make two bar charts here - one before ordering another after

#unordered
pts_win_summary <- games_rev %>%
    
    group_by(day) %>%
    summarise(
        avg_pts_win = mean(pts_win, na.rm = TRUE)
    )
pts_win_summary
## # A tibble: 7 × 2
##   day   avg_pts_win
##   <fct>       <dbl>
## 1 Sun          27.7
## 2 Mon          28.9
## 3 Thu          28.3
## 4 Sat          28.1
## 5 Fri          35.3
## 6 Tue          24  
## 7 Wed          24
pts_win_summary %>%
    
    ggplot(aes(x = day, y = avg_pts_win)) +
    geom_point()

#reordered

pts_win_summary %>%
    
    ggplot(aes(x = fct_reorder(day, avg_pts_win), y = avg_pts_win)) +
    geom_point()

Modify factor levels

Show examples of three functions:

  • fct_recode
games %>%
  mutate(day = fct_recode(day,
    "Friday"    = "Fri",
    "Monday"      = "Mon",
    "Saturday" = "Sat",
    "Sunday" = "Sun",
    "Thursday"        = "Thu",
    "Tuesday"      = "Tue",
    "Wednesday"                 = "Wed",
  )) %>%
      count(day, sort = TRUE) %>%
  print(n = Inf)
## # A tibble: 7 × 2
##   day           n
##   <fct>     <int>
## 1 Sunday     4588
## 2 Monday      339
## 3 Thursday    214
## 4 Saturday    178
## 5 Friday        3
## 6 Tuesday       1
## 7 Wednesday     1
  • fct_collapse
games %>%
  mutate(day = fct_collapse(day,
    other = c("Fri", "Tue", "Wed"),
    Sunday = c("Sun"),
    Thursday = c("Thu"),
    Monday = c("Mon"),
    Saturday = c("Sat")
  )) %>%
      count(day, sort = TRUE) %>%
  print(n = Inf)
## # A tibble: 5 × 2
##   day          n
##   <fct>    <int>
## 1 Sunday    4588
## 2 Monday     339
## 3 Thursday   214
## 4 Saturday   178
## 5 other        5
  • fct_lump
games %>%
  mutate(day = fct_lump(day, n = 3)) %>%
  count(day, sort = TRUE) %>%
  print(n = Inf)
## # A tibble: 4 × 2
##   day       n
##   <fct> <int>
## 1 Sun    4588
## 2 Mon     339
## 3 Thu     214
## 4 Other   183

Chapter 16

No need to do anything here.