data <- read_csv("../00_data/Olympics.csv")
## Rows: 271116 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): name, sex, team, noc, games, season, city, sport, event, medal
## dbl (5): id, age, height, weight, year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>% count(medal)
## # A tibble: 4 × 2
## medal n
## <chr> <int>
## 1 Bronze 13295
## 2 Gold 13372
## 3 Silver 13116
## 4 <NA> 231333
medal_levels <- c("Gold", "Silver", "Bronze")
data_rev <- data %>%
mutate(medal = medal %>% factor(levels = medal_levels))
Make two bar charts here - one before ordering another after
# Transform data: calculate average age by sex in the olympics
age_avg <- data %>%
group_by(medal) %>%
summarise(
age = mean(age, na.rm = TRUE)
)
age_avg
## # A tibble: 4 × 2
## medal age
## <chr> <dbl>
## 1 Bronze 25.9
## 2 Gold 25.9
## 3 Silver 26.0
## 4 <NA> 25.5
# Plot before reordering
ggplot(age_avg, aes(age, medal)) + geom_point()
ggplot(age_avg, aes(age, fct_reorder(medal, age))) + geom_point()
Show examples of three functions:
data %>%
mutate(medal = fct_recode(medal,
"G" = "Gold",
"S" = "Silver",
"B" = "Bronze")) %>%
count(medal)
## # A tibble: 4 × 2
## medal n
## <fct> <int>
## 1 B 13295
## 2 G 13372
## 3 S 13116
## 4 <NA> 231333
data %>%
mutate(medal = fct_collapse(medal,
G = "Gold",
Other = "Silver", "Bronze")) %>%
count(medal)
## # A tibble: 4 × 2
## medal n
## <fct> <int>
## 1 "" 13295
## 2 "G" 13372
## 3 "Other" 13116
## 4 <NA> 231333
data %>%
mutate(medal = fct_lump(medal)) %>%
count(medal)
## # A tibble: 4 × 2
## medal n
## <fct> <int>
## 1 Bronze 13295
## 2 Gold 13372
## 3 Other 13116
## 4 <NA> 231333
No need to do anything here.