data <- read_excel("../00_data/MyData.xlsx")
data
## # A tibble: 900 × 15
## year country city stage home_team away_team home_score away_score outcome
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 1930 Uruguay Montev… Grou… France Mexico 4 1 H
## 2 1930 Uruguay Montev… Grou… Belgium United S… 0 3 A
## 3 1930 Uruguay Montev… Grou… Brazil Yugoslav… 1 2 A
## 4 1930 Uruguay Montev… Grou… Peru Romania 1 3 A
## 5 1930 Uruguay Montev… Grou… Argentina France 1 0 H
## 6 1930 Uruguay Montev… Grou… Chile Mexico 3 0 H
## 7 1930 Uruguay Montev… Grou… Bolivia Yugoslav… 0 4 A
## 8 1930 Uruguay Montev… Grou… Paraguay United S… 0 3 A
## 9 1930 Uruguay Montev… Grou… Uruguay Peru 1 0 H
## 10 1930 Uruguay Montev… Grou… Argentina Mexico 6 3 H
## # ℹ 890 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## # losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>
x <- c("Mon", "Tue", "Tue", "Fri", "Sat", "Sat")
x
## [1] "Mon" "Tue" "Tue" "Fri" "Sat" "Sat"
sort(x)
## [1] "Fri" "Mon" "Sat" "Sat" "Tue" "Tue"
days <- c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")
y <- factor(x, levels = days)
y
## [1] Mon Tue Tue Fri Sat Sat
## Levels: Mon Tue Wed Thu Fri Sat Sun
sort(y)
## [1] Mon Tue Tue Fri Sat Sat
## Levels: Mon Tue Wed Thu Fri Sat Sun
Make two bar charts here - one before ordering another after
goal_per_day <- data %>%
group_by(dayofweek) %>%
summarise(
avg_goal = mean(home_score + away_score, na.rm = TRUE)
)
goal_per_day
## # A tibble: 7 × 2
## dayofweek avg_goal
## <chr> <dbl>
## 1 Friday 2.33
## 2 Monday 2.55
## 3 Saturday 2.80
## 4 Sunday 3.43
## 5 Thursday 2.60
## 6 Tuesday 2.70
## 7 Wednesday 2.82
goal_per_day %>%
ggplot(aes(x = avg_goal, y = dayofweek)) +
geom_point()+
# Labeling
labs(y = NULL, x = "Average goals per day")
goal_per_day %>%
ggplot(aes(x = avg_goal, y = fct_reorder(.f = dayofweek, .x = avg_goal))) +
geom_point() +
labs(y = NULL, x = "Average goal per day")
goal_per_day %>%
ggplot(aes(x = avg_goal, y = fct_reorder(.f = dayofweek, .x = avg_goal) %>%
fct_relevel("Saturday"))) +
geom_point() +
labs(y = NULL, x = "Average goal per day")
Show examples of three functions:
data %>%
distinct(dayofweek)
## # A tibble: 7 × 1
## dayofweek
## <chr>
## 1 Sunday
## 2 Monday
## 3 Tuesday
## 4 Wednesday
## 5 Thursday
## 6 Friday
## 7 Saturday
data %>%
mutate(weekday = fct_recode(dayofweek, "Weekday" = "Wednesday")) %>%
select(dayofweek, weekday) %>%
filter(dayofweek == "Wednesday")
## # A tibble: 148 × 2
## dayofweek weekday
## <chr> <fct>
## 1 Wednesday Weekday
## 2 Wednesday Weekday
## 3 Wednesday Weekday
## 4 Wednesday Weekday
## 5 Wednesday Weekday
## 6 Wednesday Weekday
## 7 Wednesday Weekday
## 8 Wednesday Weekday
## 9 Wednesday Weekday
## 10 Wednesday Weekday
## # ℹ 138 more rows
data %>%
mutate(weekday = fct_collapse(dayofweek, "Weekday" = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday"))) %>%
select(dayofweek, weekday) %>%
filter()
## # A tibble: 900 × 2
## dayofweek weekday
## <chr> <fct>
## 1 Sunday Sunday
## 2 Sunday Sunday
## 3 Monday Weekday
## 4 Monday Weekday
## 5 Tuesday Weekday
## 6 Wednesday Weekday
## 7 Thursday Weekday
## 8 Thursday Weekday
## 9 Friday Weekday
## 10 Saturday Saturday
## # ℹ 890 more rows
data %>%
mutate(weekday = fct_lump(dayofweek, n = 5, other_level = "Weekday")) %>%
select(dayofweek, weekday) %>%
filter()
## # A tibble: 900 × 2
## dayofweek weekday
## <chr> <fct>
## 1 Sunday Sunday
## 2 Sunday Sunday
## 3 Monday Weekday
## 4 Monday Weekday
## 5 Tuesday Tuesday
## 6 Wednesday Wednesday
## 7 Thursday Thursday
## 8 Thursday Thursday
## 9 Friday Weekday
## 10 Saturday Saturday
## # ℹ 890 more rows
No need to do anything here.