data <- read_excel("../00_data/MyData.xlsx")
data
## # A tibble: 900 × 15
## year country city stage home_team away_team home_score away_score outcome
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 1930 Uruguay Montev… Grou… France Mexico 4 1 H
## 2 1930 Uruguay Montev… Grou… Belgium United S… 0 3 A
## 3 1930 Uruguay Montev… Grou… Brazil Yugoslav… 1 2 A
## 4 1930 Uruguay Montev… Grou… Peru Romania 1 3 A
## 5 1930 Uruguay Montev… Grou… Argentina France 1 0 H
## 6 1930 Uruguay Montev… Grou… Chile Mexico 3 0 H
## 7 1930 Uruguay Montev… Grou… Bolivia Yugoslav… 0 4 A
## 8 1930 Uruguay Montev… Grou… Paraguay United S… 0 3 A
## 9 1930 Uruguay Montev… Grou… Uruguay Peru 1 0 H
## 10 1930 Uruguay Montev… Grou… Argentina Mexico 6 3 H
## # ℹ 890 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## # losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>
data_long <- data %>%
pivot_longer(cols = c('home_score', 'away_score'),
names_to = "home_or_away",
values_to = "team_score")
data_long
## # A tibble: 1,800 × 15
## year country city stage home_team away_team outcome win_conditions
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1930 Uruguay Montevideo Group 1 France Mexico H <NA>
## 2 1930 Uruguay Montevideo Group 1 France Mexico H <NA>
## 3 1930 Uruguay Montevideo Group 4 Belgium United Sta… A <NA>
## 4 1930 Uruguay Montevideo Group 4 Belgium United Sta… A <NA>
## 5 1930 Uruguay Montevideo Group 2 Brazil Yugoslavia A <NA>
## 6 1930 Uruguay Montevideo Group 2 Brazil Yugoslavia A <NA>
## 7 1930 Uruguay Montevideo Group 3 Peru Romania A <NA>
## 8 1930 Uruguay Montevideo Group 3 Peru Romania A <NA>
## 9 1930 Uruguay Montevideo Group 1 Argentina France H <NA>
## 10 1930 Uruguay Montevideo Group 1 Argentina France H <NA>
## # ℹ 1,790 more rows
## # ℹ 7 more variables: winning_team <chr>, losing_team <chr>, date <dttm>,
## # month <chr>, dayofweek <chr>, home_or_away <chr>, team_score <dbl>
data_wide <- data_long %>%
pivot_wider(names_from = home_or_away,
values_from = team_score)
data_wide
## # A tibble: 900 × 15
## year country city stage home_team away_team outcome win_conditions
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1930 Uruguay Montevideo Group 1 France Mexico H <NA>
## 2 1930 Uruguay Montevideo Group 4 Belgium United Sta… A <NA>
## 3 1930 Uruguay Montevideo Group 2 Brazil Yugoslavia A <NA>
## 4 1930 Uruguay Montevideo Group 3 Peru Romania A <NA>
## 5 1930 Uruguay Montevideo Group 1 Argentina France H <NA>
## 6 1930 Uruguay Montevideo Group 1 Chile Mexico H <NA>
## 7 1930 Uruguay Montevideo Group 2 Bolivia Yugoslavia A <NA>
## 8 1930 Uruguay Montevideo Group 4 Paraguay United Sta… A <NA>
## 9 1930 Uruguay Montevideo Group 3 Uruguay Peru H <NA>
## 10 1930 Uruguay Montevideo Group 1 Argentina Mexico H <NA>
## # ℹ 890 more rows
## # ℹ 7 more variables: winning_team <chr>, losing_team <chr>, date <dttm>,
## # month <chr>, dayofweek <chr>, home_score <dbl>, away_score <dbl>
data_sep <- data %>%
separate(col = date, into = c("year", "month", "day"))
data_sep
## # A tibble: 900 × 15
## country city stage home_team away_team home_score away_score outcome
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 Uruguay Montevideo Group 1 France Mexico 4 1 H
## 2 Uruguay Montevideo Group 4 Belgium United St… 0 3 A
## 3 Uruguay Montevideo Group 2 Brazil Yugoslavia 1 2 A
## 4 Uruguay Montevideo Group 3 Peru Romania 1 3 A
## 5 Uruguay Montevideo Group 1 Argentina France 1 0 H
## 6 Uruguay Montevideo Group 1 Chile Mexico 3 0 H
## 7 Uruguay Montevideo Group 2 Bolivia Yugoslavia 0 4 A
## 8 Uruguay Montevideo Group 4 Paraguay United St… 0 3 A
## 9 Uruguay Montevideo Group 3 Uruguay Peru 1 0 H
## 10 Uruguay Montevideo Group 1 Argentina Mexico 6 3 H
## # ℹ 890 more rows
## # ℹ 7 more variables: win_conditions <chr>, winning_team <chr>,
## # losing_team <chr>, year <chr>, month <chr>, day <chr>, dayofweek <chr>
data_unite <- data_sep %>%
unite(col = "date", c(year, month, day), sep = "-")
data_unite
## # A tibble: 900 × 13
## country city stage home_team away_team home_score away_score outcome
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 Uruguay Montevideo Group 1 France Mexico 4 1 H
## 2 Uruguay Montevideo Group 4 Belgium United St… 0 3 A
## 3 Uruguay Montevideo Group 2 Brazil Yugoslavia 1 2 A
## 4 Uruguay Montevideo Group 3 Peru Romania 1 3 A
## 5 Uruguay Montevideo Group 1 Argentina France 1 0 H
## 6 Uruguay Montevideo Group 1 Chile Mexico 3 0 H
## 7 Uruguay Montevideo Group 2 Bolivia Yugoslavia 0 4 A
## 8 Uruguay Montevideo Group 4 Paraguay United St… 0 3 A
## 9 Uruguay Montevideo Group 3 Uruguay Peru 1 0 H
## 10 Uruguay Montevideo Group 1 Argentina Mexico 6 3 H
## # ℹ 890 more rows
## # ℹ 5 more variables: win_conditions <chr>, winning_team <chr>,
## # losing_team <chr>, date <chr>, dayofweek <chr>
data %>%
complete(year, winning_team) %>%
select(year, stage, winning_team, home_team, away_team) %>%
arrange(stage)
## # A tibble: 1,942 × 5
## year stage winning_team home_team away_team
## <dbl> <chr> <chr> <chr> <chr>
## 1 1930 Final Uruguay Uruguay Argentina
## 2 1934 Final Italy Italy Czechoslovakia
## 3 1938 Final Italy Hungary Italy
## 4 1954 Final West Germany West Germany Hungary
## 5 1958 Final Brazil Sweden Brazil
## 6 1962 Final Brazil Brazil Czechoslovakia
## 7 1966 Final England England West Germany
## 8 1970 Final Brazil Brazil Italy
## 9 1974 Final West Germany West Germany Netherlands
## 10 1978 Final Argentina Argentina Netherlands
## # ℹ 1,932 more rows