Import your data

# excel filer
games <- read_excel("../00_data/MyData_charts.xlsx")
games
## # A tibble: 988 × 15
##     year country city    stage home_team away_team home_score away_score outcome
##    <dbl> <chr>   <chr>   <chr> <chr>     <chr>          <dbl>      <dbl> <chr>  
##  1  1930 Uruguay Montev… Grou… France    Mexico             4          1 H      
##  2  1930 Uruguay Montev… Grou… Belgium   United S…          0          3 A      
##  3  1930 Uruguay Montev… Grou… Brazil    Yugoslav…          1          2 A      
##  4  1930 Uruguay Montev… Grou… Peru      Romania            1          3 A      
##  5  1930 Uruguay Montev… Grou… Argentina France             1          0 H      
##  6  1930 Uruguay Montev… Grou… Chile     Mexico             3          0 H      
##  7  1930 Uruguay Montev… Grou… Bolivia   Yugoslav…          0          4 A      
##  8  1930 Uruguay Montev… Grou… Paraguay  United S…          0          3 A      
##  9  1930 Uruguay Montev… Grou… Uruguay   Peru               1          0 H      
## 10  1930 Uruguay Montev… Grou… Argentina Mexico             6          3 H      
## # ℹ 978 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## #   losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>

Filter

games_short <- games %>%
    select( year, home_team, home_score, away_team, away_score, winning_team, date) %>%
    filter(year > 2013, home_score > 1, away_score > 1)
games_short
## # A tibble: 15 × 7
##     year home_team     home_score away_team   away_score winning_team
##    <dbl> <chr>              <dbl> <chr>            <dbl> <chr>       
##  1  2014 Australia              2 Netherlands          3 Netherlands 
##  2  2014 Switzerland            2 France               5 France      
##  3  2014 Germany                2 Ghana                2 NA          
##  4  2014 South Korea            2 Algeria              4 Algeria     
##  5  2014 United States          2 Portugal             2 NA          
##  6  2014 Nigeria                2 Argentina            3 Argentina   
##  7  2018 Portugal               3 Spain                3 NA          
##  8  2018 Belgium                5 Tunisia              2 Belgium     
##  9  2018 Japan                  2 Senegal              2 NA          
## 10  2018 Spain                  2 Morocco              2 NA          
## 11  2018 Switzerland            2 Costa Rica           2 NA          
## 12  2018 France                 4 Argentina            3 France      
## 13  2018 Belgium                3 Japan                2 Belgium     
## 14  2018 Russia                 2 Croatia              2 Croatia     
## 15  2018 France                 4 Croatia              2 France      
## # ℹ 1 more variable: date <dttm>

Pivoting

Long form

games_long <- games_short %>%
    pivot_longer(cols = c(`home_score`, `away_score`),
                 names_to = "home_or_away", 
                 values_to = "score")
games_long
## # A tibble: 30 × 7
##     year home_team away_team winning_team date                home_or_away score
##    <dbl> <chr>     <chr>     <chr>        <dttm>              <chr>        <dbl>
##  1  2014 Australia Netherla… Netherlands  2014-06-18 00:00:00 home_score       2
##  2  2014 Australia Netherla… Netherlands  2014-06-18 00:00:00 away_score       3
##  3  2014 Switzerl… France    France       2014-06-20 00:00:00 home_score       2
##  4  2014 Switzerl… France    France       2014-06-20 00:00:00 away_score       5
##  5  2014 Germany   Ghana     NA           2014-06-21 00:00:00 home_score       2
##  6  2014 Germany   Ghana     NA           2014-06-21 00:00:00 away_score       2
##  7  2014 South Ko… Algeria   Algeria      2014-06-22 00:00:00 home_score       2
##  8  2014 South Ko… Algeria   Algeria      2014-06-22 00:00:00 away_score       4
##  9  2014 United S… Portugal  NA           2014-06-22 00:00:00 home_score       2
## 10  2014 United S… Portugal  NA           2014-06-22 00:00:00 away_score       2
## # ℹ 20 more rows

Wide form

games_wide <- games_long %>%
    pivot_wider(names_from = home_or_away, values_from = score)
games_wide
## # A tibble: 15 × 7
##     year home_team     away_team   winning_team date                home_score
##    <dbl> <chr>         <chr>       <chr>        <dttm>                   <dbl>
##  1  2014 Australia     Netherlands Netherlands  2014-06-18 00:00:00          2
##  2  2014 Switzerland   France      France       2014-06-20 00:00:00          2
##  3  2014 Germany       Ghana       NA           2014-06-21 00:00:00          2
##  4  2014 South Korea   Algeria     Algeria      2014-06-22 00:00:00          2
##  5  2014 United States Portugal    NA           2014-06-22 00:00:00          2
##  6  2014 Nigeria       Argentina   Argentina    2014-06-25 00:00:00          2
##  7  2018 Portugal      Spain       NA           2018-06-15 00:00:00          3
##  8  2018 Belgium       Tunisia     Belgium      2018-06-23 00:00:00          5
##  9  2018 Japan         Senegal     NA           2018-06-24 00:00:00          2
## 10  2018 Spain         Morocco     NA           2018-06-25 00:00:00          2
## 11  2018 Switzerland   Costa Rica  NA           2018-06-27 00:00:00          2
## 12  2018 France        Argentina   France       2018-06-30 00:00:00          4
## 13  2018 Belgium       Japan       Belgium      2018-07-02 00:00:00          3
## 14  2018 Russia        Croatia     Croatia      2018-07-07 00:00:00          2
## 15  2018 France        Croatia     France       2018-07-15 00:00:00          4
## # ℹ 1 more variable: away_score <dbl>

Separating and Uniting

Separate a column

games_sep <- games_short %>%
    
    separate(col = date, into = c("year", "month", "day"))
games_sep
## # A tibble: 15 × 8
##    home_team     home_score away_team  away_score winning_team year  month day  
##    <chr>              <dbl> <chr>           <dbl> <chr>        <chr> <chr> <chr>
##  1 Australia              2 Netherlan…          3 Netherlands  2014  06    18   
##  2 Switzerland            2 France              5 France       2014  06    20   
##  3 Germany                2 Ghana               2 NA           2014  06    21   
##  4 South Korea            2 Algeria             4 Algeria      2014  06    22   
##  5 United States          2 Portugal            2 NA           2014  06    22   
##  6 Nigeria                2 Argentina           3 Argentina    2014  06    25   
##  7 Portugal               3 Spain               3 NA           2018  06    15   
##  8 Belgium                5 Tunisia             2 Belgium      2018  06    23   
##  9 Japan                  2 Senegal             2 NA           2018  06    24   
## 10 Spain                  2 Morocco             2 NA           2018  06    25   
## 11 Switzerland            2 Costa Rica          2 NA           2018  06    27   
## 12 France                 4 Argentina           3 France       2018  06    30   
## 13 Belgium                3 Japan               2 Belgium      2018  07    02   
## 14 Russia                 2 Croatia             2 Croatia      2018  07    07   
## 15 France                 4 Croatia             2 France       2018  07    15

Unite two columns

games_unite <- games_sep %>%
    
    unite(col = "date", c(year,day), sep = "-")
games_unite
## # A tibble: 15 × 7
##    home_team     home_score away_team   away_score winning_team date    month
##    <chr>              <dbl> <chr>            <dbl> <chr>        <chr>   <chr>
##  1 Australia              2 Netherlands          3 Netherlands  2014-18 06   
##  2 Switzerland            2 France               5 France       2014-20 06   
##  3 Germany                2 Ghana                2 NA           2014-21 06   
##  4 South Korea            2 Algeria              4 Algeria      2014-22 06   
##  5 United States          2 Portugal             2 NA           2014-22 06   
##  6 Nigeria                2 Argentina            3 Argentina    2014-25 06   
##  7 Portugal               3 Spain                3 NA           2018-15 06   
##  8 Belgium                5 Tunisia              2 Belgium      2018-23 06   
##  9 Japan                  2 Senegal              2 NA           2018-24 06   
## 10 Spain                  2 Morocco              2 NA           2018-25 06   
## 11 Switzerland            2 Costa Rica           2 NA           2018-27 06   
## 12 France                 4 Argentina            3 France       2018-30 06   
## 13 Belgium                3 Japan                2 Belgium      2018-02 07   
## 14 Russia                 2 Croatia              2 Croatia      2018-07 07   
## 15 France                 4 Croatia              2 France       2018-15 07

Missing Values

games_short %>%
    
    complete(year, winning_team) %>%
    select(year, winning_team, home_team, away_team) %>%
    
    arrange(desc(year))
## # A tibble: 20 × 4
##     year winning_team home_team     away_team  
##    <dbl> <chr>        <chr>         <chr>      
##  1  2018 Algeria      <NA>          <NA>       
##  2  2018 Argentina    <NA>          <NA>       
##  3  2018 Belgium      Belgium       Tunisia    
##  4  2018 Belgium      Belgium       Japan      
##  5  2018 Croatia      Russia        Croatia    
##  6  2018 France       France        Argentina  
##  7  2018 France       France        Croatia    
##  8  2018 NA           Portugal      Spain      
##  9  2018 NA           Japan         Senegal    
## 10  2018 NA           Spain         Morocco    
## 11  2018 NA           Switzerland   Costa Rica 
## 12  2018 Netherlands  <NA>          <NA>       
## 13  2014 Algeria      South Korea   Algeria    
## 14  2014 Argentina    Nigeria       Argentina  
## 15  2014 Belgium      <NA>          <NA>       
## 16  2014 Croatia      <NA>          <NA>       
## 17  2014 France       Switzerland   France     
## 18  2014 NA           Germany       Ghana      
## 19  2014 NA           United States Portugal   
## 20  2014 Netherlands  Australia     Netherlands