Import your data

soccer <- read_csv("../00_data/myData.csv")
## Rows: 900 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (11): country, city, stage, home_team, away_team, outcome, win_conditio...
## dbl   (3): year, home_score, away_score
## date  (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 15

Create a factor

Modify factor order

Make two bar charts here - one before ordering another after

soccer %>%
    count(outcome)
## # A tibble: 3 × 2
##   outcome     n
##   <chr>   <int>
## 1 A         302
## 2 D         169
## 3 H         429
ggplot(soccer, aes(outcome)) +
    geom_bar()

soccer %>%
    count(outcome)
## # A tibble: 3 × 2
##   outcome     n
##   <chr>   <int>
## 1 A         302
## 2 D         169
## 3 H         429
ggplot(soccer, aes(outcome)) +
    geom_bar() +
    scale_x_discrete(drop = FALSE)

Modify factor levels

Show examples of three functions:

  • fct_recode
  • fct_collapse
  • fct_lump
# fct_recode
soccer %>%
    mutate(outcome_rev = fct_recode(outcome, "Home" = "H")) %>%
    select(outcome, outcome_rev) %>%
    filter(outcome == "H")
## # A tibble: 429 × 2
##    outcome outcome_rev
##    <chr>   <fct>      
##  1 H       Home       
##  2 H       Home       
##  3 H       Home       
##  4 H       Home       
##  5 H       Home       
##  6 H       Home       
##  7 H       Home       
##  8 H       Home       
##  9 H       Home       
## 10 H       Home       
## # … with 419 more rows
#fct_collapse
soccer %>%
    mutate(outcome_col = fct_collapse(outcome, "Away" = c("A", "D"))) %>%
    select(outcome, outcome_col) %>%
    filter(outcome != "H")
## # A tibble: 471 × 2
##    outcome outcome_col
##    <chr>   <fct>      
##  1 A       Away       
##  2 A       Away       
##  3 A       Away       
##  4 A       Away       
##  5 A       Away       
##  6 A       Away       
##  7 A       Away       
##  8 A       Away       
##  9 A       Away       
## 10 A       Away       
## # … with 461 more rows
# fct_lump
soccer %>% count(outcome)
## # A tibble: 3 × 2
##   outcome     n
##   <chr>   <int>
## 1 A         302
## 2 D         169
## 3 H         429
soccer %>% mutate(outcome_lump = fct_lump(outcome)) %>%
    distinct(outcome_lump)
## # A tibble: 3 × 1
##   outcome_lump
##   <fct>       
## 1 H           
## 2 A           
## 3 Other

Chapter 16

No need to do anything here.