# excel filer
games <- read_excel("../00_data/MyData_charts.xlsx")
games
## # A tibble: 988 × 15
## year country city stage home_team away_team home_score away_score outcome
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 1930 Uruguay Montev… Grou… France Mexico 4 1 H
## 2 1930 Uruguay Montev… Grou… Belgium United S… 0 3 A
## 3 1930 Uruguay Montev… Grou… Brazil Yugoslav… 1 2 A
## 4 1930 Uruguay Montev… Grou… Peru Romania 1 3 A
## 5 1930 Uruguay Montev… Grou… Argentina France 1 0 H
## 6 1930 Uruguay Montev… Grou… Chile Mexico 3 0 H
## 7 1930 Uruguay Montev… Grou… Bolivia Yugoslav… 0 4 A
## 8 1930 Uruguay Montev… Grou… Paraguay United S… 0 3 A
## 9 1930 Uruguay Montev… Grou… Uruguay Peru 1 0 H
## 10 1930 Uruguay Montev… Grou… Argentina Mexico 6 3 H
## # ℹ 978 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## # losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>
games_short <- games %>%
select( year, home_team, home_score, away_team, away_score, winning_team, date) %>%
filter(year > 2013, home_score > 1, away_score > 1)
games_short
## # A tibble: 15 × 7
## year home_team home_score away_team away_score winning_team
## <dbl> <chr> <dbl> <chr> <dbl> <chr>
## 1 2014 Australia 2 Netherlands 3 Netherlands
## 2 2014 Switzerland 2 France 5 France
## 3 2014 Germany 2 Ghana 2 NA
## 4 2014 South Korea 2 Algeria 4 Algeria
## 5 2014 United States 2 Portugal 2 NA
## 6 2014 Nigeria 2 Argentina 3 Argentina
## 7 2018 Portugal 3 Spain 3 NA
## 8 2018 Belgium 5 Tunisia 2 Belgium
## 9 2018 Japan 2 Senegal 2 NA
## 10 2018 Spain 2 Morocco 2 NA
## 11 2018 Switzerland 2 Costa Rica 2 NA
## 12 2018 France 4 Argentina 3 France
## 13 2018 Belgium 3 Japan 2 Belgium
## 14 2018 Russia 2 Croatia 2 Croatia
## 15 2018 France 4 Croatia 2 France
## # ℹ 1 more variable: date <dttm>
games_long <- games_short %>%
pivot_longer(cols = c(`home_score`, `away_score`),
names_to = "home_or_away",
values_to = "score")
games_long
## # A tibble: 30 × 7
## year home_team away_team winning_team date home_or_away score
## <dbl> <chr> <chr> <chr> <dttm> <chr> <dbl>
## 1 2014 Australia Netherla… Netherlands 2014-06-18 00:00:00 home_score 2
## 2 2014 Australia Netherla… Netherlands 2014-06-18 00:00:00 away_score 3
## 3 2014 Switzerl… France France 2014-06-20 00:00:00 home_score 2
## 4 2014 Switzerl… France France 2014-06-20 00:00:00 away_score 5
## 5 2014 Germany Ghana NA 2014-06-21 00:00:00 home_score 2
## 6 2014 Germany Ghana NA 2014-06-21 00:00:00 away_score 2
## 7 2014 South Ko… Algeria Algeria 2014-06-22 00:00:00 home_score 2
## 8 2014 South Ko… Algeria Algeria 2014-06-22 00:00:00 away_score 4
## 9 2014 United S… Portugal NA 2014-06-22 00:00:00 home_score 2
## 10 2014 United S… Portugal NA 2014-06-22 00:00:00 away_score 2
## # ℹ 20 more rows
games_wide <- games_long %>%
pivot_wider(names_from = home_or_away, values_from = score)
games_wide
## # A tibble: 15 × 7
## year home_team away_team winning_team date home_score
## <dbl> <chr> <chr> <chr> <dttm> <dbl>
## 1 2014 Australia Netherlands Netherlands 2014-06-18 00:00:00 2
## 2 2014 Switzerland France France 2014-06-20 00:00:00 2
## 3 2014 Germany Ghana NA 2014-06-21 00:00:00 2
## 4 2014 South Korea Algeria Algeria 2014-06-22 00:00:00 2
## 5 2014 United States Portugal NA 2014-06-22 00:00:00 2
## 6 2014 Nigeria Argentina Argentina 2014-06-25 00:00:00 2
## 7 2018 Portugal Spain NA 2018-06-15 00:00:00 3
## 8 2018 Belgium Tunisia Belgium 2018-06-23 00:00:00 5
## 9 2018 Japan Senegal NA 2018-06-24 00:00:00 2
## 10 2018 Spain Morocco NA 2018-06-25 00:00:00 2
## 11 2018 Switzerland Costa Rica NA 2018-06-27 00:00:00 2
## 12 2018 France Argentina France 2018-06-30 00:00:00 4
## 13 2018 Belgium Japan Belgium 2018-07-02 00:00:00 3
## 14 2018 Russia Croatia Croatia 2018-07-07 00:00:00 2
## 15 2018 France Croatia France 2018-07-15 00:00:00 4
## # ℹ 1 more variable: away_score <dbl>
games_sep <- games_short %>%
separate(col = date, into = c("year", "month", "day"))
games_sep
## # A tibble: 15 × 8
## home_team home_score away_team away_score winning_team year month day
## <chr> <dbl> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 Australia 2 Netherlan… 3 Netherlands 2014 06 18
## 2 Switzerland 2 France 5 France 2014 06 20
## 3 Germany 2 Ghana 2 NA 2014 06 21
## 4 South Korea 2 Algeria 4 Algeria 2014 06 22
## 5 United States 2 Portugal 2 NA 2014 06 22
## 6 Nigeria 2 Argentina 3 Argentina 2014 06 25
## 7 Portugal 3 Spain 3 NA 2018 06 15
## 8 Belgium 5 Tunisia 2 Belgium 2018 06 23
## 9 Japan 2 Senegal 2 NA 2018 06 24
## 10 Spain 2 Morocco 2 NA 2018 06 25
## 11 Switzerland 2 Costa Rica 2 NA 2018 06 27
## 12 France 4 Argentina 3 France 2018 06 30
## 13 Belgium 3 Japan 2 Belgium 2018 07 02
## 14 Russia 2 Croatia 2 Croatia 2018 07 07
## 15 France 4 Croatia 2 France 2018 07 15
games_unite <- games_sep %>%
unite(col = "date", c(year,day), sep = "-")
games_unite
## # A tibble: 15 × 7
## home_team home_score away_team away_score winning_team date month
## <chr> <dbl> <chr> <dbl> <chr> <chr> <chr>
## 1 Australia 2 Netherlands 3 Netherlands 2014-18 06
## 2 Switzerland 2 France 5 France 2014-20 06
## 3 Germany 2 Ghana 2 NA 2014-21 06
## 4 South Korea 2 Algeria 4 Algeria 2014-22 06
## 5 United States 2 Portugal 2 NA 2014-22 06
## 6 Nigeria 2 Argentina 3 Argentina 2014-25 06
## 7 Portugal 3 Spain 3 NA 2018-15 06
## 8 Belgium 5 Tunisia 2 Belgium 2018-23 06
## 9 Japan 2 Senegal 2 NA 2018-24 06
## 10 Spain 2 Morocco 2 NA 2018-25 06
## 11 Switzerland 2 Costa Rica 2 NA 2018-27 06
## 12 France 4 Argentina 3 France 2018-30 06
## 13 Belgium 3 Japan 2 Belgium 2018-02 07
## 14 Russia 2 Croatia 2 Croatia 2018-07 07
## 15 France 4 Croatia 2 France 2018-15 07
games_short %>%
complete(year, winning_team) %>%
select(year, winning_team, home_team, away_team) %>%
arrange(desc(year))
## # A tibble: 20 × 4
## year winning_team home_team away_team
## <dbl> <chr> <chr> <chr>
## 1 2018 Algeria <NA> <NA>
## 2 2018 Argentina <NA> <NA>
## 3 2018 Belgium Belgium Tunisia
## 4 2018 Belgium Belgium Japan
## 5 2018 Croatia Russia Croatia
## 6 2018 France France Argentina
## 7 2018 France France Croatia
## 8 2018 NA Portugal Spain
## 9 2018 NA Japan Senegal
## 10 2018 NA Spain Morocco
## 11 2018 NA Switzerland Costa Rica
## 12 2018 Netherlands <NA> <NA>
## 13 2014 Algeria South Korea Algeria
## 14 2014 Argentina Nigeria Argentina
## 15 2014 Belgium <NA> <NA>
## 16 2014 Croatia <NA> <NA>
## 17 2014 France Switzerland France
## 18 2014 NA Germany Ghana
## 19 2014 NA United States Portugal
## 20 2014 Netherlands Australia Netherlands