data <- readxl::read_xlsx("myData.xlsx")
data %>% count(medal)
## # A tibble: 4 × 2
## medal n
## <chr> <int>
## 1 Bronze 13295
## 2 Gold 13372
## 3 NA 231333
## 4 Silver 13116
medal_levels <- c("Gold", "Silver", "Bronze")
data_rev <- data %>%
mutate(medal = medal %>% factor(levels = medal_levels))
data_summary <- data %>%
filter(!is.na(medal)) %>%
group_by(medal) %>%
summarise(
avg_age = mean(as.numeric(age), na.rm = TRUE)
) %>%
mutate(medal = factor(medal))
data_summary
## # A tibble: 4 × 2
## medal avg_age
## <fct> <dbl>
## 1 Bronze 25.9
## 2 Gold 25.9
## 3 NA 25.5
## 4 Silver 26.0
ggplot(data_summary, aes(avg_age, medal)) + geom_point()
ggplot(data_summary, aes(avg_age, fct_reorder(medal, avg_age))) + geom_point()
data %>%
mutate(medal = fct_recode(medal,
"Gold Medal" = "Gold",
"Silver Medal" = "Silver",
"Bronze Medal" = "Bronze")) %>%
count(medal)
## # A tibble: 4 × 2
## medal n
## <fct> <int>
## 1 Bronze Medal 13295
## 2 Gold Medal 13372
## 3 NA 231333
## 4 Silver Medal 13116
data %>%
mutate(medal = fct_collapse(medal,
Medalist = c("Gold", "Silver", "Bronze"))) %>%
count(medal)
## # A tibble: 2 × 2
## medal n
## <fct> <int>
## 1 Medalist 39783
## 2 NA 231333
data %>%
mutate(medal = fct_lump(medal)) %>%
count(medal)
## # A tibble: 2 × 2
## medal n
## <fct> <int>
## 1 NA 231333
## 2 Other 39783
No need to do anything here.