Import data

# excel file
data <- read_excel("../00_data/MyData.xlsx")

data
## # A tibble: 900 × 15
##     year country city    stage home_team away_team home_score away_score outcome
##    <dbl> <chr>   <chr>   <chr> <chr>     <chr>          <dbl>      <dbl> <chr>  
##  1  1930 Uruguay Montev… Grou… France    Mexico             4          1 H      
##  2  1930 Uruguay Montev… Grou… Belgium   United S…          0          3 A      
##  3  1930 Uruguay Montev… Grou… Brazil    Yugoslav…          1          2 A      
##  4  1930 Uruguay Montev… Grou… Peru      Romania            1          3 A      
##  5  1930 Uruguay Montev… Grou… Argentina France             1          0 H      
##  6  1930 Uruguay Montev… Grou… Chile     Mexico             3          0 H      
##  7  1930 Uruguay Montev… Grou… Bolivia   Yugoslav…          0          4 A      
##  8  1930 Uruguay Montev… Grou… Paraguay  United S…          0          3 A      
##  9  1930 Uruguay Montev… Grou… Uruguay   Peru               1          0 H      
## 10  1930 Uruguay Montev… Grou… Argentina Mexico             6          3 H      
## # ℹ 890 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## #   losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>

Apply the following dplyr verbs to your data

Filter rows

filter(data, stage == "Final")
## # A tibble: 20 × 15
##     year country   city  stage home_team away_team home_score away_score outcome
##    <dbl> <chr>     <chr> <chr> <chr>     <chr>          <dbl>      <dbl> <chr>  
##  1  1930 Uruguay   Mont… Final Uruguay   Argentina          4          2 H      
##  2  1934 Italy     Rome  Final Italy     Czechosl…          2          1 H      
##  3  1938 France    Colo… Final Hungary   Italy              2          4 A      
##  4  1954 Switzerl… Berne Final West Ger… Hungary            3          2 H      
##  5  1958 Sweden    Solna Final Sweden    Brazil             2          5 A      
##  6  1962 Chile     Sant… Final Brazil    Czechosl…          3          1 H      
##  7  1966 England   Lond… Final England   West Ger…          4          2 H      
##  8  1970 Mexico    Mexi… Final Brazil    Italy              4          1 H      
##  9  1974 Germany   Muni… Final West Ger… Netherla…          2          1 H      
## 10  1978 Argentina Buen… Final Argentina Netherla…          3          1 H      
## 11  1982 Spain     Madr… Final Italy     West Ger…          3          1 H      
## 12  1986 Mexico    Mexi… Final Argentina West Ger…          3          2 H      
## 13  1990 Italy     Rome  Final West Ger… Argentina          1          0 H      
## 14  1994 United S… Pasa… Final Brazil    Italy              0          0 H      
## 15  1998 France    Sain… Final France    Brazil             3          0 H      
## 16  2002 Japan     Yoko… Final Germany   Brazil             0          2 A      
## 17  2006 Germany   Berl… Final Italy     France             1          1 H      
## 18  2010 South Af… Joha… Final Netherla… Spain              0          1 A      
## 19  2014 Brazil    Rio … Final Germany   Argentina          1          0 H      
## 20  2018 Russia    Mosc… Final France    Croatia            4          2 H      
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## #   losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>

Arrange rows

arrange(data, desc(year), desc(stage))
## # A tibble: 900 × 15
##     year country city    stage home_team away_team home_score away_score outcome
##    <dbl> <chr>   <chr>   <chr> <chr>     <chr>          <dbl>      <dbl> <chr>  
##  1  2018 Russia  Saint … Thir… Belgium   England            2          0 H      
##  2  2018 Russia  Saint … Semi… France    Belgium            1          0 H      
##  3  2018 Russia  Moscow  Semi… Croatia   England            2          1 H      
##  4  2018 Russia  Kazan   Roun… France    Argentina          4          3 H      
##  5  2018 Russia  Sochi   Roun… Uruguay   Portugal           2          1 H      
##  6  2018 Russia  Moscow  Roun… Russia    Spain              1          1 H      
##  7  2018 Russia  Nizhny… Roun… Croatia   Denmark            1          1 H      
##  8  2018 Russia  Samara  Roun… Brazil    Mexico             2          0 H      
##  9  2018 Russia  Rostov… Roun… Belgium   Japan              3          2 H      
## 10  2018 Russia  Saint … Roun… Sweden    Switzerl…          1          0 H      
## # ℹ 890 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## #   losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>

Select columns

select(data, year:city)
## # A tibble: 900 × 3
##     year country city      
##    <dbl> <chr>   <chr>     
##  1  1930 Uruguay Montevideo
##  2  1930 Uruguay Montevideo
##  3  1930 Uruguay Montevideo
##  4  1930 Uruguay Montevideo
##  5  1930 Uruguay Montevideo
##  6  1930 Uruguay Montevideo
##  7  1930 Uruguay Montevideo
##  8  1930 Uruguay Montevideo
##  9  1930 Uruguay Montevideo
## 10  1930 Uruguay Montevideo
## # ℹ 890 more rows

Add columns

mutate(data,
       goal_diff = abs(home_score - away_score)) %>%
    
    select(year, home_team, away_team, outcome, goal_diff)
## # A tibble: 900 × 5
##     year home_team away_team     outcome goal_diff
##    <dbl> <chr>     <chr>         <chr>       <dbl>
##  1  1930 France    Mexico        H               3
##  2  1930 Belgium   United States A               3
##  3  1930 Brazil    Yugoslavia    A               1
##  4  1930 Peru      Romania       A               2
##  5  1930 Argentina France        H               1
##  6  1930 Chile     Mexico        H               3
##  7  1930 Bolivia   Yugoslavia    A               4
##  8  1930 Paraguay  United States A               3
##  9  1930 Uruguay   Peru          H               1
## 10  1930 Argentina Mexico        H               3
## # ℹ 890 more rows

Summarize by groups

data %>%
    
    group_by(outcome) %>%
    
    summarise(
        goal_diff = sum(home_score - away_score)
    ) %>%
    
    arrange(outcome)
## # A tibble: 3 × 2
##   outcome goal_diff
##   <chr>       <dbl>
## 1 A            -537
## 2 D               0
## 3 H             813

```