Import your data

data <- read_excel("../00_data/MyData.xlsx")
data
## # A tibble: 900 × 15
##     year country city    stage home_team away_team home_score away_score outcome
##    <dbl> <chr>   <chr>   <chr> <chr>     <chr>          <dbl>      <dbl> <chr>  
##  1  1930 Uruguay Montev… Grou… France    Mexico             4          1 H      
##  2  1930 Uruguay Montev… Grou… Belgium   United S…          0          3 A      
##  3  1930 Uruguay Montev… Grou… Brazil    Yugoslav…          1          2 A      
##  4  1930 Uruguay Montev… Grou… Peru      Romania            1          3 A      
##  5  1930 Uruguay Montev… Grou… Argentina France             1          0 H      
##  6  1930 Uruguay Montev… Grou… Chile     Mexico             3          0 H      
##  7  1930 Uruguay Montev… Grou… Bolivia   Yugoslav…          0          4 A      
##  8  1930 Uruguay Montev… Grou… Paraguay  United S…          0          3 A      
##  9  1930 Uruguay Montev… Grou… Uruguay   Peru               1          0 H      
## 10  1930 Uruguay Montev… Grou… Argentina Mexico             6          3 H      
## # ℹ 890 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## #   losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>

Chapter 15

Create a factor

x <- c("Mon", "Tue", "Tue", "Fri", "Sat", "Sat")
x
## [1] "Mon" "Tue" "Tue" "Fri" "Sat" "Sat"
sort(x)
## [1] "Fri" "Mon" "Sat" "Sat" "Tue" "Tue"
days <- c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")

y <- factor(x, levels = days)
y
## [1] Mon Tue Tue Fri Sat Sat
## Levels: Mon Tue Wed Thu Fri Sat Sun
sort(y)
## [1] Mon Tue Tue Fri Sat Sat
## Levels: Mon Tue Wed Thu Fri Sat Sun

Modify factor order

Make two bar charts here - one before ordering another after

goal_per_day <- data %>%
  
  group_by(dayofweek) %>%
  summarise(
    avg_goal = mean(home_score + away_score, na.rm = TRUE)
  )
goal_per_day
## # A tibble: 7 × 2
##   dayofweek avg_goal
##   <chr>        <dbl>
## 1 Friday        2.33
## 2 Monday        2.55
## 3 Saturday      2.80
## 4 Sunday        3.43
## 5 Thursday      2.60
## 6 Tuesday       2.70
## 7 Wednesday     2.82
goal_per_day %>%
  
  ggplot(aes(x = avg_goal, y = dayofweek)) +
  geom_point()+ 
  
  # Labeling
  labs(y = NULL, x = "Average goals per day")

goal_per_day %>%
    
    ggplot(aes(x = avg_goal, y = fct_reorder(.f = dayofweek, .x = avg_goal))) +
    geom_point() +
    
    labs(y = NULL, x = "Average goal per day")

goal_per_day %>%
    
    ggplot(aes(x = avg_goal, y = fct_reorder(.f = dayofweek, .x = avg_goal) %>%
                   fct_relevel("Saturday"))) +
    geom_point() +
    
    labs(y = NULL, x = "Average goal per day")

Modify factor levels

Show examples of three functions:

data %>%
    distinct(dayofweek)
## # A tibble: 7 × 1
##   dayofweek
##   <chr>    
## 1 Sunday   
## 2 Monday   
## 3 Tuesday  
## 4 Wednesday
## 5 Thursday 
## 6 Friday   
## 7 Saturday
  • fct_recode
data %>%
    mutate(weekday = fct_recode(dayofweek, "Weekday" = "Wednesday")) %>%
    select(dayofweek, weekday) %>%
    filter(dayofweek == "Wednesday")
## # A tibble: 148 × 2
##    dayofweek weekday
##    <chr>     <fct>  
##  1 Wednesday Weekday
##  2 Wednesday Weekday
##  3 Wednesday Weekday
##  4 Wednesday Weekday
##  5 Wednesday Weekday
##  6 Wednesday Weekday
##  7 Wednesday Weekday
##  8 Wednesday Weekday
##  9 Wednesday Weekday
## 10 Wednesday Weekday
## # ℹ 138 more rows
  • fct_collapse
data %>%
    mutate(weekday = fct_collapse(dayofweek, "Weekday" = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday"))) %>%
    select(dayofweek, weekday) %>%
    filter()
## # A tibble: 900 × 2
##    dayofweek weekday 
##    <chr>     <fct>   
##  1 Sunday    Sunday  
##  2 Sunday    Sunday  
##  3 Monday    Weekday 
##  4 Monday    Weekday 
##  5 Tuesday   Weekday 
##  6 Wednesday Weekday 
##  7 Thursday  Weekday 
##  8 Thursday  Weekday 
##  9 Friday    Weekday 
## 10 Saturday  Saturday
## # ℹ 890 more rows
  • fct_lump
data %>%
    mutate(weekday = fct_lump(dayofweek, n = 5, other_level = "Weekday")) %>%
    select(dayofweek, weekday) %>%
    filter()
## # A tibble: 900 × 2
##    dayofweek weekday  
##    <chr>     <fct>    
##  1 Sunday    Sunday   
##  2 Sunday    Sunday   
##  3 Monday    Weekday  
##  4 Monday    Weekday  
##  5 Tuesday   Tuesday  
##  6 Wednesday Wednesday
##  7 Thursday  Thursday 
##  8 Thursday  Thursday 
##  9 Friday    Weekday  
## 10 Saturday  Saturday 
## # ℹ 890 more rows

Chapter 16

No need to do anything here.