Import your data

data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-02/marbles.csv')
## Rows: 256 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): date, race, site, source, marble_name, team_name, pole, host, notes
## dbl (5): time_s, points, track_length_m, number_laps, avg_time_lap
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 15

Create a factor

data %>% count(site) 
## # A tibble: 8 × 2
##   site                n
##   <chr>           <int>
## 1 Greenstone         32
## 2 Hivedrive          32
## 3 Midnight Bay       32
## 4 Momotorway         32
## 5 O'raceway          32
## 6 Razzway            32
## 7 Savage Speedway    32
## 8 Short Circuit      32
site_name_length_order <- c("Razzway", "Hivedrive", "O'raceway", "Greenstone", "Momotorway", "Midnight Bay", "Short Circuit", "Savage Speedway")

data_rev <- data %>% 
    mutate(site = site %>% factor(levels = site_name_length_order))

Modify factor order

Make two bar charts here - one before ordering another after

data_summary <- data_rev %>%
  group_by(site) %>%
  summarise(
    avg_time_lap = mean(avg_time_lap, na.rm = TRUE)
  )

data_summary
## # A tibble: 8 × 2
##   site            avg_time_lap
##   <fct>                  <dbl>
## 1 Razzway                 31.6
## 2 Hivedrive               28.8
## 3 O'raceway               34.2
## 4 Greenstone              38.1
## 5 Momotorway              24.3
## 6 Midnight Bay            27.8
## 7 Short Circuit           21.3
## 8 Savage Speedway         31.3
ggplot(data_summary, aes(avg_time_lap, site)) + geom_point()

ggplot(data_summary, aes(avg_time_lap, fct_reorder(site, avg_time_lap))) + geom_point()

Modify factor levels

Show examples of three functions:

  • fct_recode
data %>%
    mutate(site = fct_recode(site,
                             "RazzwayTrack"         = "Razzway", 
                             "HivedriveTrack"       = "Hivedrive", 
                             "O'racewayTrack"       = "O'raceway", 
                             "GreenstoneTrack"      = "Greenstone", 
                             "MomotorwayTrack"      = "Momotorway", 
                             "Midnight BayTrack"    = "Midnight Bay",
                             "Short CircuitTrack"   = "Short Circuit", 
                             "Savage SpeedwayTrack" = "Savage Speedway")) %>% 
    count(site)
## # A tibble: 8 × 2
##   site                     n
##   <fct>                <int>
## 1 GreenstoneTrack         32
## 2 HivedriveTrack          32
## 3 Midnight BayTrack       32
## 4 MomotorwayTrack         32
## 5 O'racewayTrack          32
## 6 RazzwayTrack            32
## 7 Savage SpeedwayTrack    32
## 8 Short CircuitTrack      32
  • fct_collapse
data %>% 
    mutate(site = fct_collapse(site, 
                               RazzwayTrack = "Razzway", 
                               Other = c("Hivedrive", "Greenstone", "Midnight Bay", "Momotorway", "O'raceway", "Savage Speedway", "Short Circuit"))) %>%
    count(site)
## # A tibble: 2 × 2
##   site             n
##   <fct>        <int>
## 1 Other          224
## 2 RazzwayTrack    32
  • fct_lump
data %>% 
    mutate(site = fct_lump(site)) %>% 
    count(site)
## # A tibble: 8 × 2
##   site                n
##   <fct>           <int>
## 1 Greenstone         32
## 2 Hivedrive          32
## 3 Midnight Bay       32
## 4 Momotorway         32
## 5 O'raceway          32
## 6 Razzway            32
## 7 Savage Speedway    32
## 8 Short Circuit      32

Chapter 16

No need to do anything here.