soccer <- read_csv("../00_data/myData.csv")
## Rows: 900 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): country, city, stage, home_team, away_team, outcome, win_conditio...
## dbl (3): year, home_score, away_score
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Make two bar charts here - one before ordering another after
soccer %>%
count(outcome)
## # A tibble: 3 × 2
## outcome n
## <chr> <int>
## 1 A 302
## 2 D 169
## 3 H 429
ggplot(soccer, aes(outcome)) +
geom_bar()
soccer %>%
count(outcome)
## # A tibble: 3 × 2
## outcome n
## <chr> <int>
## 1 A 302
## 2 D 169
## 3 H 429
ggplot(soccer, aes(outcome)) +
geom_bar() +
scale_x_discrete(drop = FALSE)
Show examples of three functions:
# fct_recode
soccer %>%
mutate(outcome_rev = fct_recode(outcome, "Home" = "H")) %>%
select(outcome, outcome_rev) %>%
filter(outcome == "H")
## # A tibble: 429 × 2
## outcome outcome_rev
## <chr> <fct>
## 1 H Home
## 2 H Home
## 3 H Home
## 4 H Home
## 5 H Home
## 6 H Home
## 7 H Home
## 8 H Home
## 9 H Home
## 10 H Home
## # … with 419 more rows
#fct_collapse
soccer %>%
mutate(outcome_col = fct_collapse(outcome, "Away" = c("A", "D"))) %>%
select(outcome, outcome_col) %>%
filter(outcome != "H")
## # A tibble: 471 × 2
## outcome outcome_col
## <chr> <fct>
## 1 A Away
## 2 A Away
## 3 A Away
## 4 A Away
## 5 A Away
## 6 A Away
## 7 A Away
## 8 A Away
## 9 A Away
## 10 A Away
## # … with 461 more rows
# fct_lump
soccer %>% count(outcome)
## # A tibble: 3 × 2
## outcome n
## <chr> <int>
## 1 A 302
## 2 D 169
## 3 H 429
soccer %>% mutate(outcome_lump = fct_lump(outcome)) %>%
distinct(outcome_lump)
## # A tibble: 3 × 1
## outcome_lump
## <fct>
## 1 H
## 2 A
## 3 Other
No need to do anything here.