data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-02/marbles.csv')
## Rows: 256 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): date, race, site, source, marble_name, team_name, pole, host, notes
## dbl (5): time_s, points, track_length_m, number_laps, avg_time_lap
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>% count(site)
## # A tibble: 8 × 2
## site n
## <chr> <int>
## 1 Greenstone 32
## 2 Hivedrive 32
## 3 Midnight Bay 32
## 4 Momotorway 32
## 5 O'raceway 32
## 6 Razzway 32
## 7 Savage Speedway 32
## 8 Short Circuit 32
site_name_length_order <- c("Razzway", "Hivedrive", "O'raceway", "Greenstone", "Momotorway", "Midnight Bay", "Short Circuit", "Savage Speedway")
data_rev <- data %>%
mutate(site = site %>% factor(levels = site_name_length_order))
Make two bar charts here - one before ordering another after
data_summary <- data_rev %>%
group_by(site) %>%
summarise(
avg_time_lap = mean(avg_time_lap, na.rm = TRUE)
)
data_summary
## # A tibble: 8 × 2
## site avg_time_lap
## <fct> <dbl>
## 1 Razzway 31.6
## 2 Hivedrive 28.8
## 3 O'raceway 34.2
## 4 Greenstone 38.1
## 5 Momotorway 24.3
## 6 Midnight Bay 27.8
## 7 Short Circuit 21.3
## 8 Savage Speedway 31.3
ggplot(data_summary, aes(avg_time_lap, site)) + geom_point()
ggplot(data_summary, aes(avg_time_lap, fct_reorder(site, avg_time_lap))) + geom_point()
Show examples of three functions:
data %>%
mutate(site = fct_recode(site,
"RazzwayTrack" = "Razzway",
"HivedriveTrack" = "Hivedrive",
"O'racewayTrack" = "O'raceway",
"GreenstoneTrack" = "Greenstone",
"MomotorwayTrack" = "Momotorway",
"Midnight BayTrack" = "Midnight Bay",
"Short CircuitTrack" = "Short Circuit",
"Savage SpeedwayTrack" = "Savage Speedway")) %>%
count(site)
## # A tibble: 8 × 2
## site n
## <fct> <int>
## 1 GreenstoneTrack 32
## 2 HivedriveTrack 32
## 3 Midnight BayTrack 32
## 4 MomotorwayTrack 32
## 5 O'racewayTrack 32
## 6 RazzwayTrack 32
## 7 Savage SpeedwayTrack 32
## 8 Short CircuitTrack 32
data %>%
mutate(site = fct_collapse(site,
RazzwayTrack = "Razzway",
Other = c("Hivedrive", "Greenstone", "Midnight Bay", "Momotorway", "O'raceway", "Savage Speedway", "Short Circuit"))) %>%
count(site)
## # A tibble: 2 × 2
## site n
## <fct> <int>
## 1 Other 224
## 2 RazzwayTrack 32
data %>%
mutate(site = fct_lump(site)) %>%
count(site)
## # A tibble: 8 × 2
## site n
## <fct> <int>
## 1 Greenstone 32
## 2 Hivedrive 32
## 3 Midnight Bay 32
## 4 Momotorway 32
## 5 O'raceway 32
## 6 Razzway 32
## 7 Savage Speedway 32
## 8 Short Circuit 32
No need to do anything here.