# excel file
data <- read_excel("../00_data/MyData.xlsx")
data
## # A tibble: 900 × 15
## year country city stage home_team away_team home_score away_score outcome
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 1930 Uruguay Montev… Grou… France Mexico 4 1 H
## 2 1930 Uruguay Montev… Grou… Belgium United S… 0 3 A
## 3 1930 Uruguay Montev… Grou… Brazil Yugoslav… 1 2 A
## 4 1930 Uruguay Montev… Grou… Peru Romania 1 3 A
## 5 1930 Uruguay Montev… Grou… Argentina France 1 0 H
## 6 1930 Uruguay Montev… Grou… Chile Mexico 3 0 H
## 7 1930 Uruguay Montev… Grou… Bolivia Yugoslav… 0 4 A
## 8 1930 Uruguay Montev… Grou… Paraguay United S… 0 3 A
## 9 1930 Uruguay Montev… Grou… Uruguay Peru 1 0 H
## 10 1930 Uruguay Montev… Grou… Argentina Mexico 6 3 H
## # ℹ 890 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## # losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>
filter(data, stage == "Final")
## # A tibble: 20 × 15
## year country city stage home_team away_team home_score away_score outcome
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 1930 Uruguay Mont… Final Uruguay Argentina 4 2 H
## 2 1934 Italy Rome Final Italy Czechosl… 2 1 H
## 3 1938 France Colo… Final Hungary Italy 2 4 A
## 4 1954 Switzerl… Berne Final West Ger… Hungary 3 2 H
## 5 1958 Sweden Solna Final Sweden Brazil 2 5 A
## 6 1962 Chile Sant… Final Brazil Czechosl… 3 1 H
## 7 1966 England Lond… Final England West Ger… 4 2 H
## 8 1970 Mexico Mexi… Final Brazil Italy 4 1 H
## 9 1974 Germany Muni… Final West Ger… Netherla… 2 1 H
## 10 1978 Argentina Buen… Final Argentina Netherla… 3 1 H
## 11 1982 Spain Madr… Final Italy West Ger… 3 1 H
## 12 1986 Mexico Mexi… Final Argentina West Ger… 3 2 H
## 13 1990 Italy Rome Final West Ger… Argentina 1 0 H
## 14 1994 United S… Pasa… Final Brazil Italy 0 0 H
## 15 1998 France Sain… Final France Brazil 3 0 H
## 16 2002 Japan Yoko… Final Germany Brazil 0 2 A
## 17 2006 Germany Berl… Final Italy France 1 1 H
## 18 2010 South Af… Joha… Final Netherla… Spain 0 1 A
## 19 2014 Brazil Rio … Final Germany Argentina 1 0 H
## 20 2018 Russia Mosc… Final France Croatia 4 2 H
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## # losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>
arrange(data, desc(year), desc(stage))
## # A tibble: 900 × 15
## year country city stage home_team away_team home_score away_score outcome
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 2018 Russia Saint … Thir… Belgium England 2 0 H
## 2 2018 Russia Saint … Semi… France Belgium 1 0 H
## 3 2018 Russia Moscow Semi… Croatia England 2 1 H
## 4 2018 Russia Kazan Roun… France Argentina 4 3 H
## 5 2018 Russia Sochi Roun… Uruguay Portugal 2 1 H
## 6 2018 Russia Moscow Roun… Russia Spain 1 1 H
## 7 2018 Russia Nizhny… Roun… Croatia Denmark 1 1 H
## 8 2018 Russia Samara Roun… Brazil Mexico 2 0 H
## 9 2018 Russia Rostov… Roun… Belgium Japan 3 2 H
## 10 2018 Russia Saint … Roun… Sweden Switzerl… 1 0 H
## # ℹ 890 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## # losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>
select(data, year:city)
## # A tibble: 900 × 3
## year country city
## <dbl> <chr> <chr>
## 1 1930 Uruguay Montevideo
## 2 1930 Uruguay Montevideo
## 3 1930 Uruguay Montevideo
## 4 1930 Uruguay Montevideo
## 5 1930 Uruguay Montevideo
## 6 1930 Uruguay Montevideo
## 7 1930 Uruguay Montevideo
## 8 1930 Uruguay Montevideo
## 9 1930 Uruguay Montevideo
## 10 1930 Uruguay Montevideo
## # ℹ 890 more rows
mutate(data,
goal_diff = abs(home_score - away_score)) %>%
select(year, home_team, away_team, outcome, goal_diff)
## # A tibble: 900 × 5
## year home_team away_team outcome goal_diff
## <dbl> <chr> <chr> <chr> <dbl>
## 1 1930 France Mexico H 3
## 2 1930 Belgium United States A 3
## 3 1930 Brazil Yugoslavia A 1
## 4 1930 Peru Romania A 2
## 5 1930 Argentina France H 1
## 6 1930 Chile Mexico H 3
## 7 1930 Bolivia Yugoslavia A 4
## 8 1930 Paraguay United States A 3
## 9 1930 Uruguay Peru H 1
## 10 1930 Argentina Mexico H 3
## # ℹ 890 more rows
data %>%
group_by(outcome) %>%
summarise(
goal_diff = sum(home_score - away_score)
) %>%
arrange(outcome)
## # A tibble: 3 × 2
## outcome goal_diff
## <chr> <dbl>
## 1 A -537
## 2 D 0
## 3 H 813
```