# excel filer
games <- read_excel("../00_data/MyData_charts.xlsx")
games
## # A tibble: 988 × 15
## year country city stage home_team away_team home_score away_score outcome
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 1930 Uruguay Montev… Grou… France Mexico 4 1 H
## 2 1930 Uruguay Montev… Grou… Belgium United S… 0 3 A
## 3 1930 Uruguay Montev… Grou… Brazil Yugoslav… 1 2 A
## 4 1930 Uruguay Montev… Grou… Peru Romania 1 3 A
## 5 1930 Uruguay Montev… Grou… Argentina France 1 0 H
## 6 1930 Uruguay Montev… Grou… Chile Mexico 3 0 H
## 7 1930 Uruguay Montev… Grou… Bolivia Yugoslav… 0 4 A
## 8 1930 Uruguay Montev… Grou… Paraguay United S… 0 3 A
## 9 1930 Uruguay Montev… Grou… Uruguay Peru 1 0 H
## 10 1930 Uruguay Montev… Grou… Argentina Mexico 6 3 H
## # ℹ 978 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## # losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>
x1 <- c("Sun", "Sun", "Mon", "Mon", "Tue", "Wed")
sort(x1)
## [1] "Mon" "Mon" "Sun" "Sun" "Tue" "Wed"
day_levels <- c(
"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"
)
y1 <- factor(x1, levels = day_levels)
y1
## [1] Sun Sun Mon Mon Tue Wed
## Levels: Mon Tue Wed Thu Fri Sat Sun
sort(y1)
## [1] Mon Mon Tue Wed Sun Sun
## Levels: Mon Tue Wed Thu Fri Sat Sun
Make two bar charts here - one before ordering another after #### Unordered
homegoal_summary <- games %>%
group_by(dayofweek) %>%
summarise(
avg_goals = mean(home_score, na.rm = TRUE)
)
homegoal_summary
## # A tibble: 8 × 2
## dayofweek avg_goals
## <chr> <dbl>
## 1 Friday 1.29
## 2 Monday 1.65
## 3 Saturday 1.55
## 4 Sunday 1.80
## 5 Thursday 1.39
## 6 Tuesday 1.57
## 7 Wednesday 1.54
## 8 <NA> 4.16
homegoal_summary %>%
ggplot(aes(x = avg_goals, y = dayofweek)) +
geom_point() +
#Labaling
labs(y = NULL, x = "Mean goals each week day")
# Reorder
homegoal_summary %>%
ggplot(aes(x = avg_goals, y = fct_reorder(.f = dayofweek, .x = avg_goals))) +
geom_point() +
#Labaling
labs(y = NULL, x = "Mean goals each week day")
# Relevel
homegoal_summary %>%
ggplot(aes(x = avg_goals,
y = fct_reorder(.f = dayofweek, .x = avg_goals) %>%
fct_relevel("Sunday"))) +
geom_point() +
#Labaling
labs(y = NULL, x = "Mean goals each week day")
# Recode
games %>% distinct(dayofweek)
## # A tibble: 8 × 1
## dayofweek
## <chr>
## 1 Sunday
## 2 Monday
## 3 Tuesday
## 4 Wednesday
## 5 Thursday
## 6 Friday
## 7 Saturday
## 8 <NA>
games %>%
mutate(weekend = fct_recode(dayofweek, "Weekend" = "Saturday")) %>%
select(dayofweek, weekend) %>%
filter(dayofweek == "Saturday")
## # A tibble: 152 × 2
## dayofweek weekend
## <chr> <fct>
## 1 Saturday Weekend
## 2 Saturday Weekend
## 3 Saturday Weekend
## 4 Saturday Weekend
## 5 Saturday Weekend
## 6 Saturday Weekend
## 7 Saturday Weekend
## 8 Saturday Weekend
## 9 Saturday Weekend
## 10 Saturday Weekend
## # ℹ 142 more rows
# Colapse multiple levels into one
games %>%
mutate(weekday = fct_collapse(dayofweek, "Weekend" = c("Saturday", "Sunday"))) %>%
select(dayofweek, weekday) %>%
filter(dayofweek !="Monday")
## # A tibble: 818 × 2
## dayofweek weekday
## <chr> <fct>
## 1 Sunday Weekend
## 2 Sunday Weekend
## 3 Tuesday Tuesday
## 4 Wednesday Wednesday
## 5 Thursday Thursday
## 6 Thursday Thursday
## 7 Friday Friday
## 8 Saturday Weekend
## 9 Saturday Weekend
## 10 Sunday Weekend
## # ℹ 808 more rows
# Lump small levels into other levels
games %>% count(dayofweek)
## # A tibble: 8 × 2
## dayofweek n
## <chr> <int>
## 1 Friday 92
## 2 Monday 82
## 3 Saturday 152
## 4 Sunday 196
## 5 Thursday 111
## 6 Tuesday 119
## 7 Wednesday 148
## 8 <NA> 88
games %>% mutate(day_lump = fct_lump(dayofweek)) %>% distinct(day_lump)
## # A tibble: 8 × 1
## day_lump
## <fct>
## 1 Sunday
## 2 Other
## 3 Tuesday
## 4 Wednesday
## 5 Thursday
## 6 Friday
## 7 Saturday
## 8 <NA>
No need to do anything here.