winners <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-04-25/winners.csv')
## Rows: 163 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Category, Athlete, Nationality
## dbl (1): Year
## time (1): Time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
x1 <- c("Women", "Wheelchair Women")
athlete_category <- c(
"Women", "Wheelchair Women")
y1 <- factor(x1, levels = athlete_category)
y1
## [1] Women Wheelchair Women
## Levels: Women Wheelchair Women
average_time_by_category <- winners %>%
group_by(Category) %>%
summarize(avg_total_time = mean(Time, na.rm = TRUE))
average_time_by_category
## # A tibble: 4 × 2
## Category avg_total_time
## <chr> <drtn>
## 1 Men 7643.767 secs
## 2 Wheelchair Men 6343.000 secs
## 3 Wheelchair Women 7781.564 secs
## 4 Women 8585.167 secs
# Plot
average_time_by_category %>%
ggplot(aes(x= avg_total_time, y = Category)) +
geom_point()
## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.
Show examples of three functions:
winners %>%
mutate(Cat_rev = fct_recode(Category, "Disabled Women" = "Wheelchair Women")) %>%
select(Category, Cat_rev) %>%
filter(Category == "Wheelchair Women")
## # A tibble: 39 × 2
## Category Cat_rev
## <chr> <fct>
## 1 Wheelchair Women Disabled Women
## 2 Wheelchair Women Disabled Women
## 3 Wheelchair Women Disabled Women
## 4 Wheelchair Women Disabled Women
## 5 Wheelchair Women Disabled Women
## 6 Wheelchair Women Disabled Women
## 7 Wheelchair Women Disabled Women
## 8 Wheelchair Women Disabled Women
## 9 Wheelchair Women Disabled Women
## 10 Wheelchair Women Disabled Women
## # ℹ 29 more rows
winners %>%
mutate(Cat_col = fct_collapse(Category, "Women" = c("Wheelchair Women", "Women"))) %>%
select(Category, Cat_col) %>%
filter(Category != "Men") %>%
filter(Category != "Wheelchair Men") %>%
sample_n(15)
## # A tibble: 15 × 2
## Category Cat_col
## <chr> <fct>
## 1 Women Women
## 2 Wheelchair Women Women
## 3 Women Women
## 4 Women Women
## 5 Wheelchair Women Women
## 6 Women Women
## 7 Women Women
## 8 Women Women
## 9 Wheelchair Women Women
## 10 Wheelchair Women Women
## 11 Wheelchair Women Women
## 12 Women Women
## 13 Wheelchair Women Women
## 14 Women Women
## 15 Wheelchair Women Women
winners %>% count(Nationality)
## # A tibble: 24 × 2
## Nationality n
## <chr> <int>
## 1 Australia 3
## 2 Belgium 1
## 3 Canada 4
## 4 China 1
## 5 Denmark 3
## 6 Ethiopia 9
## 7 France 3
## 8 Germany 5
## 9 Ireland 6
## 10 Italy 4
## # ℹ 14 more rows
winners %>% mutate(nat_lump = fct_lump(Nationality)) %>% distinct(nat_lump)
## # A tibble: 24 × 1
## nat_lump
## <fct>
## 1 United States
## 2 Norway
## 3 United Kingdom
## 4 Japan
## 5 Denmark
## 6 Kenya
## 7 Soviet Union
## 8 Portugal
## 9 Mexico
## 10 Spain
## # ℹ 14 more rows
No need to do anything here.