Import your data

Data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-29/wcmatches.csv')
## Rows: 900 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (11): country, city, stage, home_team, away_team, outcome, win_conditio...
## dbl   (3): year, home_score, away_score
## date  (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 15

Create a factor

x1 <- c("Sun", "Sun", "Mon", "Mon", "Tue", "Wed")

sort(x1)
## [1] "Mon" "Mon" "Sun" "Sun" "Tue" "Wed"
day_levels <- c(
    "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"
)
y1 <- factor(x1, levels = day_levels)
y1
## [1] Sun Sun Mon Mon Tue Wed
## Levels: Mon Tue Wed Thu Fri Sat Sun
sort(y1)
## [1] Mon Mon Tue Wed Sun Sun
## Levels: Mon Tue Wed Thu Fri Sat Sun

Modify factor order

Make two bar charts here - one before ordering another after

homegoal_summary <- Data %>%
    group_by(dayofweek) %>%
    summarise(
        avg_goals = mean(home_score, na.rm = TRUE)
    )
homegoal_summary
## # A tibble: 7 × 2
##   dayofweek avg_goals
##   <chr>         <dbl>
## 1 Friday         1.29
## 2 Monday         1.65
## 3 Saturday       1.55
## 4 Sunday         1.80
## 5 Thursday       1.39
## 6 Tuesday        1.57
## 7 Wednesday      1.54
homegoal_summary %>%
    ggplot(aes(x = avg_goals, y = dayofweek)) +
    geom_point() +
    
    # Labeling
    labs(y = NULL, x = "Mean goals each week day")

Modify factor levels

Show examples of three functions:

  • fct_recode
  • fct_collapse
  • fct_lump
# Reorder
homegoal_summary %>%
    
    ggplot(aes(x = avg_goals, y = fct_reorder(.f = dayofweek, .x = avg_goals))) +
    geom_point() +
    
    # Labeling
    labs(y = NULL, x = "Mean goals each week day")

# Relevel
homegoal_summary %>%
    
    ggplot(aes(x = avg_goals,
               y = fct_reorder(.f = dayofweek, .x = avg_goals) %>%
                   fct_relevel("Sunday"))) +
    geom_point() +
    
    # Labeling
    labs(y = NULL, x = "Mean goals each week day")

# Recode
Data %>% distinct(dayofweek)
## # A tibble: 7 × 1
##   dayofweek
##   <chr>    
## 1 Sunday   
## 2 Monday   
## 3 Tuesday  
## 4 Wednesday
## 5 Thursday 
## 6 Friday   
## 7 Saturday
Data %>%
    mutate(weekend = fct_recode(dayofweek, "Weekend" = "Saturday")) %>%
    select(dayofweek, weekend) %>%
    filter(dayofweek == "Saturday")
## # A tibble: 152 × 2
##    dayofweek weekend
##    <chr>     <fct>  
##  1 Saturday  Weekend
##  2 Saturday  Weekend
##  3 Saturday  Weekend
##  4 Saturday  Weekend
##  5 Saturday  Weekend
##  6 Saturday  Weekend
##  7 Saturday  Weekend
##  8 Saturday  Weekend
##  9 Saturday  Weekend
## 10 Saturday  Weekend
## # ℹ 142 more rows
# Colapse multiple levels into one
Data %>%
    mutate(weekday = fct_collapse(dayofweek, "Weekend" = c("Saturday", "Sunday"))) %>%
    select(dayofweek, weekday) %>%
    filter(dayofweek !="Monday")
## # A tibble: 818 × 2
##    dayofweek weekday  
##    <chr>     <fct>    
##  1 Sunday    Weekend  
##  2 Sunday    Weekend  
##  3 Tuesday   Tuesday  
##  4 Wednesday Wednesday
##  5 Thursday  Thursday 
##  6 Thursday  Thursday 
##  7 Friday    Friday   
##  8 Saturday  Weekend  
##  9 Saturday  Weekend  
## 10 Sunday    Weekend  
## # ℹ 808 more rows
# Lump sum levels into other levels
Data %>% count(dayofweek)
## # A tibble: 7 × 2
##   dayofweek     n
##   <chr>     <int>
## 1 Friday       92
## 2 Monday       82
## 3 Saturday    152
## 4 Sunday      196
## 5 Thursday    111
## 6 Tuesday     119
## 7 Wednesday   148
Data %>% mutate(day_lump = fct_lump(dayofweek)) %>% distinct(day_lump)
## # A tibble: 7 × 1
##   day_lump 
##   <fct>    
## 1 Sunday   
## 2 Other    
## 3 Tuesday  
## 4 Wednesday
## 5 Thursday 
## 6 Friday   
## 7 Saturday

Chapter 16

No need to do anything here.