Import your data

data <- read_csv("../00_data/Olympics.csv")
## Rows: 271116 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): name, sex, team, noc, games, season, city, sport, event, medal
## dbl  (5): id, age, height, weight, year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 15

Create a factor

data %>% count(medal)
## # A tibble: 4 × 2
##   medal       n
##   <chr>   <int>
## 1 Bronze  13295
## 2 Gold    13372
## 3 Silver  13116
## 4 <NA>   231333
medal_levels <- c("Gold", "Silver", "Bronze")

data_rev <- data %>%
    mutate(medal = medal %>% factor(levels = medal_levels))

Modify factor order

Make two bar charts here - one before ordering another after

# Transform data: calculate average age by sex in the olympics
age_avg <- data %>%
    
    group_by(medal) %>%
    summarise(
        age = mean(age, na.rm = TRUE)
    ) 


age_avg
## # A tibble: 4 × 2
##   medal    age
##   <chr>  <dbl>
## 1 Bronze  25.9
## 2 Gold    25.9
## 3 Silver  26.0
## 4 <NA>    25.5
# Plot before reordering
ggplot(age_avg, aes(age, medal)) + geom_point()

ggplot(age_avg, aes(age, fct_reorder(medal, age))) + geom_point()

Modify factor levels

Show examples of three functions:

  • fct_recode
data %>%
    mutate(medal = fct_recode(medal,
                              "G" = "Gold",
                              "S" = "Silver",
                              "B" = "Bronze")) %>%
    
    count(medal)
## # A tibble: 4 × 2
##   medal      n
##   <fct>  <int>
## 1 B      13295
## 2 G      13372
## 3 S      13116
## 4 <NA>  231333
  • fct_collapse
data %>%
    mutate(medal = fct_collapse(medal,
                                G = "Gold",
                                Other = "Silver", "Bronze")) %>%
    
    count(medal)
## # A tibble: 4 × 2
##   medal        n
##   <fct>    <int>
## 1 ""       13295
## 2 "G"      13372
## 3 "Other"  13116
## 4  <NA>   231333
  • fct_lump
data %>%
    mutate(medal = fct_lump(medal)) %>%
    
    count(medal)
## # A tibble: 4 × 2
##   medal       n
##   <fct>   <int>
## 1 Bronze  13295
## 2 Gold    13372
## 3 Other   13116
## 4 <NA>   231333

Chapter 16

No need to do anything here.