Import your data

MyData <- read_csv("../00_data/MyData.csv")
## New names:
## Rows: 380 Columns: 23
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): Date, HomeTeam, AwayTeam, FTR, HTR, Referee dbl (17): ...1, FTHG, FTAG,
## HTHG, HTAG, HS, AS, HST, AST, HF, AF, HC, AC, HY...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

Chapter 15

Create a factor

Modify factor order

Make two bar charts here - one before ordering another after

Corners_HT <- MyData %>%
    
    group_by(HomeTeam) %>%
    summarise(
        avg_corners = mean(HC, na.rm = TRUE)
    )

Corners_HT
## # A tibble: 20 × 2
##    HomeTeam       avg_corners
##    <chr>                <dbl>
##  1 Arsenal               6.68
##  2 Aston Villa           4.74
##  3 Brentford             4.63
##  4 Brighton              6.21
##  5 Burnley               5.37
##  6 Chelsea               7.26
##  7 Crystal Palace        4.79
##  8 Everton               4.89
##  9 Leeds                 5.05
## 10 Leicester             4.89
## 11 Liverpool             8   
## 12 Man City              8.95
## 13 Man United            5   
## 14 Newcastle             4.05
## 15 Norwich               5.26
## 16 Southampton           6.05
## 17 Tottenham             5.58
## 18 Watford               4.42
## 19 West Ham              5.58
## 20 Wolves                4.63
Corners_HT %>%
    
    ggplot(aes(x = avg_corners, y = HomeTeam)) +
    geom_point()

Corners_HT %>%
    
    ggplot(aes(x = avg_corners, y = fct_reorder(.f = HomeTeam, .x = avg_corners ))) +
    geom_point() +
    
    labs(y = NULL, x = "Average Corners per Home Team")

Modify factor levels

Show examples of three functions:

FCT_Recode

MyData %>% distinct(FTR)
## # A tibble: 3 × 1
##   FTR  
##   <chr>
## 1 H    
## 2 A    
## 3 D
MyData %>%
    mutate(FTR_rev = fct_recode(FTR, 
                                "Home" = "H",
                                "Away" = "A",
                                "Draw" = "D")) %>%
    select(FTR, FTR_rev) %>%
    filter(FTR == "H")
## # A tibble: 163 × 2
##    FTR   FTR_rev
##    <chr> <fct>  
##  1 H     Home   
##  2 H     Home   
##  3 H     Home   
##  4 H     Home   
##  5 H     Home   
##  6 H     Home   
##  7 H     Home   
##  8 H     Home   
##  9 H     Home   
## 10 H     Home   
## # ℹ 153 more rows

FCT_Collapse

MyData %>%
    
    mutate(FTR_col = fct_collapse(FTR, "Win" = c("H", "A"))) %>%
    select(FTR, FTR_col) %>%
    filter(FTR != "D")
## # A tibble: 292 × 2
##    FTR   FTR_col
##    <chr> <fct>  
##  1 H     Win    
##  2 H     Win    
##  3 A     Win    
##  4 H     Win    
##  5 H     Win    
##  6 H     Win    
##  7 H     Win    
##  8 A     Win    
##  9 A     Win    
## 10 H     Win    
## # ℹ 282 more rows

FCT_Lump

MyData %>% count(FTR)
## # A tibble: 3 × 2
##   FTR       n
##   <chr> <int>
## 1 A       129
## 2 D        88
## 3 H       163
MyData %>% mutate(FTR_lump = fct_lump(FTR)) %>% distinct(FTR_lump)
## # A tibble: 3 × 1
##   FTR_lump
##   <fct>   
## 1 H       
## 2 A       
## 3 Other

Chapter 16

No need to do anything here.