MyData <- read_csv("../00_data/MyData.csv")
## New names:
## Rows: 380 Columns: 23
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): Date, HomeTeam, AwayTeam, FTR, HTR, Referee dbl (17): ...1, FTHG, FTAG,
## HTHG, HTAG, HS, AS, HST, AST, HF, AF, HC, AC, HY...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
Make two bar charts here - one before ordering another after
Corners_HT <- MyData %>%
group_by(HomeTeam) %>%
summarise(
avg_corners = mean(HC, na.rm = TRUE)
)
Corners_HT
## # A tibble: 20 × 2
## HomeTeam avg_corners
## <chr> <dbl>
## 1 Arsenal 6.68
## 2 Aston Villa 4.74
## 3 Brentford 4.63
## 4 Brighton 6.21
## 5 Burnley 5.37
## 6 Chelsea 7.26
## 7 Crystal Palace 4.79
## 8 Everton 4.89
## 9 Leeds 5.05
## 10 Leicester 4.89
## 11 Liverpool 8
## 12 Man City 8.95
## 13 Man United 5
## 14 Newcastle 4.05
## 15 Norwich 5.26
## 16 Southampton 6.05
## 17 Tottenham 5.58
## 18 Watford 4.42
## 19 West Ham 5.58
## 20 Wolves 4.63
Corners_HT %>%
ggplot(aes(x = avg_corners, y = HomeTeam)) +
geom_point()
Corners_HT %>%
ggplot(aes(x = avg_corners, y = fct_reorder(.f = HomeTeam, .x = avg_corners ))) +
geom_point() +
labs(y = NULL, x = "Average Corners per Home Team")
Show examples of three functions:
MyData %>% distinct(FTR)
## # A tibble: 3 × 1
## FTR
## <chr>
## 1 H
## 2 A
## 3 D
MyData %>%
mutate(FTR_rev = fct_recode(FTR,
"Home" = "H",
"Away" = "A",
"Draw" = "D")) %>%
select(FTR, FTR_rev) %>%
filter(FTR == "H")
## # A tibble: 163 × 2
## FTR FTR_rev
## <chr> <fct>
## 1 H Home
## 2 H Home
## 3 H Home
## 4 H Home
## 5 H Home
## 6 H Home
## 7 H Home
## 8 H Home
## 9 H Home
## 10 H Home
## # ℹ 153 more rows
MyData %>%
mutate(FTR_col = fct_collapse(FTR, "Win" = c("H", "A"))) %>%
select(FTR, FTR_col) %>%
filter(FTR != "D")
## # A tibble: 292 × 2
## FTR FTR_col
## <chr> <fct>
## 1 H Win
## 2 H Win
## 3 A Win
## 4 H Win
## 5 H Win
## 6 H Win
## 7 H Win
## 8 A Win
## 9 A Win
## 10 H Win
## # ℹ 282 more rows
MyData %>% count(FTR)
## # A tibble: 3 × 2
## FTR n
## <chr> <int>
## 1 A 129
## 2 D 88
## 3 H 163
MyData %>% mutate(FTR_lump = fct_lump(FTR)) %>% distinct(FTR_lump)
## # A tibble: 3 × 1
## FTR_lump
## <fct>
## 1 H
## 2 A
## 3 Other
No need to do anything here.