Data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-29/wcmatches.csv')
## Rows: 900 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): country, city, stage, home_team, away_team, outcome, win_conditio...
## dbl (3): year, home_score, away_score
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
x1 <- c("Sun", "Sun", "Mon", "Mon", "Tue", "Wed")
sort(x1)
## [1] "Mon" "Mon" "Sun" "Sun" "Tue" "Wed"
day_levels <- c(
"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"
)
y1 <- factor(x1, levels = day_levels)
y1
## [1] Sun Sun Mon Mon Tue Wed
## Levels: Mon Tue Wed Thu Fri Sat Sun
sort(y1)
## [1] Mon Mon Tue Wed Sun Sun
## Levels: Mon Tue Wed Thu Fri Sat Sun
Make two bar charts here - one before ordering another after
homegoal_summary <- Data %>%
group_by(dayofweek) %>%
summarise(
avg_goals = mean(home_score, na.rm = TRUE)
)
homegoal_summary
## # A tibble: 7 × 2
## dayofweek avg_goals
## <chr> <dbl>
## 1 Friday 1.29
## 2 Monday 1.65
## 3 Saturday 1.55
## 4 Sunday 1.80
## 5 Thursday 1.39
## 6 Tuesday 1.57
## 7 Wednesday 1.54
homegoal_summary %>%
ggplot(aes(x = avg_goals, y = dayofweek)) +
geom_point() +
# Labeling
labs(y = NULL, x = "Mean goals each week day")
Show examples of three functions:
# Reorder
homegoal_summary %>%
ggplot(aes(x = avg_goals, y = fct_reorder(.f = dayofweek, .x = avg_goals))) +
geom_point() +
# Labeling
labs(y = NULL, x = "Mean goals each week day")
# Relevel
homegoal_summary %>%
ggplot(aes(x = avg_goals,
y = fct_reorder(.f = dayofweek, .x = avg_goals) %>%
fct_relevel("Sunday"))) +
geom_point() +
# Labeling
labs(y = NULL, x = "Mean goals each week day")
# Recode
Data %>% distinct(dayofweek)
## # A tibble: 7 × 1
## dayofweek
## <chr>
## 1 Sunday
## 2 Monday
## 3 Tuesday
## 4 Wednesday
## 5 Thursday
## 6 Friday
## 7 Saturday
Data %>%
mutate(weekend = fct_recode(dayofweek, "Weekend" = "Saturday")) %>%
select(dayofweek, weekend) %>%
filter(dayofweek == "Saturday")
## # A tibble: 152 × 2
## dayofweek weekend
## <chr> <fct>
## 1 Saturday Weekend
## 2 Saturday Weekend
## 3 Saturday Weekend
## 4 Saturday Weekend
## 5 Saturday Weekend
## 6 Saturday Weekend
## 7 Saturday Weekend
## 8 Saturday Weekend
## 9 Saturday Weekend
## 10 Saturday Weekend
## # ℹ 142 more rows
# Colapse multiple levels into one
Data %>%
mutate(weekday = fct_collapse(dayofweek, "Weekend" = c("Saturday", "Sunday"))) %>%
select(dayofweek, weekday) %>%
filter(dayofweek !="Monday")
## # A tibble: 818 × 2
## dayofweek weekday
## <chr> <fct>
## 1 Sunday Weekend
## 2 Sunday Weekend
## 3 Tuesday Tuesday
## 4 Wednesday Wednesday
## 5 Thursday Thursday
## 6 Thursday Thursday
## 7 Friday Friday
## 8 Saturday Weekend
## 9 Saturday Weekend
## 10 Sunday Weekend
## # ℹ 808 more rows
# Lump sum levels into other levels
Data %>% count(dayofweek)
## # A tibble: 7 × 2
## dayofweek n
## <chr> <int>
## 1 Friday 92
## 2 Monday 82
## 3 Saturday 152
## 4 Sunday 196
## 5 Thursday 111
## 6 Tuesday 119
## 7 Wednesday 148
Data %>% mutate(day_lump = fct_lump(dayofweek)) %>% distinct(day_lump)
## # A tibble: 7 × 1
## day_lump
## <fct>
## 1 Sunday
## 2 Other
## 3 Tuesday
## 4 Wednesday
## 5 Thursday
## 6 Friday
## 7 Saturday
No need to do anything here.