Import your data

data <- readxl::read_xlsx("myData.xlsx")

Chapter 15

Create a factor

data %>% count(medal)
## # A tibble: 4 × 2
##   medal       n
##   <chr>   <int>
## 1 Bronze  13295
## 2 Gold    13372
## 3 NA     231333
## 4 Silver  13116
medal_levels <- c("Gold", "Silver", "Bronze")

data_rev <- data %>%
  mutate(medal = medal %>% factor(levels = medal_levels))

Modify factor order

data_summary <- data %>%
  filter(!is.na(medal)) %>%
  group_by(medal) %>%
  summarise(
    avg_age = mean(as.numeric(age), na.rm = TRUE)
  ) %>%
  mutate(medal = factor(medal))


data_summary
## # A tibble: 4 × 2
##   medal  avg_age
##   <fct>    <dbl>
## 1 Bronze    25.9
## 2 Gold      25.9
## 3 NA        25.5
## 4 Silver    26.0
ggplot(data_summary, aes(avg_age, medal)) + geom_point()

ggplot(data_summary, aes(avg_age, fct_reorder(medal, avg_age))) + geom_point()

Modify factor levels

  • fct_recode
data %>%
  mutate(medal = fct_recode(medal,
                            "Gold Medal"   = "Gold",
                            "Silver Medal" = "Silver",
                            "Bronze Medal" = "Bronze")) %>%
  count(medal)
## # A tibble: 4 × 2
##   medal             n
##   <fct>         <int>
## 1 Bronze Medal  13295
## 2 Gold Medal    13372
## 3 NA           231333
## 4 Silver Medal  13116
  • fct_collapse
data %>%
  mutate(medal = fct_collapse(medal,
                              Medalist = c("Gold", "Silver", "Bronze"))) %>%
  count(medal)
## # A tibble: 2 × 2
##   medal         n
##   <fct>     <int>
## 1 Medalist  39783
## 2 NA       231333
  • fct_lump
data %>%
  mutate(medal = fct_lump(medal)) %>%
  count(medal)
## # A tibble: 2 × 2
##   medal      n
##   <fct>  <int>
## 1 NA    231333
## 2 Other  39783

Chapter 16

No need to do anything here.