Import your data

data <- read_excel("../00_data/MyData.xlsx")
data
## # A tibble: 900 × 15
##     year country city    stage home_team away_team home_score away_score outcome
##    <dbl> <chr>   <chr>   <chr> <chr>     <chr>          <dbl>      <dbl> <chr>  
##  1  1930 Uruguay Montev… Grou… France    Mexico             4          1 H      
##  2  1930 Uruguay Montev… Grou… Belgium   United S…          0          3 A      
##  3  1930 Uruguay Montev… Grou… Brazil    Yugoslav…          1          2 A      
##  4  1930 Uruguay Montev… Grou… Peru      Romania            1          3 A      
##  5  1930 Uruguay Montev… Grou… Argentina France             1          0 H      
##  6  1930 Uruguay Montev… Grou… Chile     Mexico             3          0 H      
##  7  1930 Uruguay Montev… Grou… Bolivia   Yugoslav…          0          4 A      
##  8  1930 Uruguay Montev… Grou… Paraguay  United S…          0          3 A      
##  9  1930 Uruguay Montev… Grou… Uruguay   Peru               1          0 H      
## 10  1930 Uruguay Montev… Grou… Argentina Mexico             6          3 H      
## # ℹ 890 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## #   losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>

Pivoting

long to wide form

data_long <- data %>%
    pivot_longer(cols = c('home_score', 'away_score'),
                 names_to = "home_or_away",
                 values_to = "team_score")
data_long
## # A tibble: 1,800 × 15
##     year country city       stage   home_team away_team   outcome win_conditions
##    <dbl> <chr>   <chr>      <chr>   <chr>     <chr>       <chr>   <chr>         
##  1  1930 Uruguay Montevideo Group 1 France    Mexico      H       <NA>          
##  2  1930 Uruguay Montevideo Group 1 France    Mexico      H       <NA>          
##  3  1930 Uruguay Montevideo Group 4 Belgium   United Sta… A       <NA>          
##  4  1930 Uruguay Montevideo Group 4 Belgium   United Sta… A       <NA>          
##  5  1930 Uruguay Montevideo Group 2 Brazil    Yugoslavia  A       <NA>          
##  6  1930 Uruguay Montevideo Group 2 Brazil    Yugoslavia  A       <NA>          
##  7  1930 Uruguay Montevideo Group 3 Peru      Romania     A       <NA>          
##  8  1930 Uruguay Montevideo Group 3 Peru      Romania     A       <NA>          
##  9  1930 Uruguay Montevideo Group 1 Argentina France      H       <NA>          
## 10  1930 Uruguay Montevideo Group 1 Argentina France      H       <NA>          
## # ℹ 1,790 more rows
## # ℹ 7 more variables: winning_team <chr>, losing_team <chr>, date <dttm>,
## #   month <chr>, dayofweek <chr>, home_or_away <chr>, team_score <dbl>

wide to long form

data_wide <- data_long %>%
    pivot_wider(names_from = home_or_away,
                values_from = team_score)
data_wide
## # A tibble: 900 × 15
##     year country city       stage   home_team away_team   outcome win_conditions
##    <dbl> <chr>   <chr>      <chr>   <chr>     <chr>       <chr>   <chr>         
##  1  1930 Uruguay Montevideo Group 1 France    Mexico      H       <NA>          
##  2  1930 Uruguay Montevideo Group 4 Belgium   United Sta… A       <NA>          
##  3  1930 Uruguay Montevideo Group 2 Brazil    Yugoslavia  A       <NA>          
##  4  1930 Uruguay Montevideo Group 3 Peru      Romania     A       <NA>          
##  5  1930 Uruguay Montevideo Group 1 Argentina France      H       <NA>          
##  6  1930 Uruguay Montevideo Group 1 Chile     Mexico      H       <NA>          
##  7  1930 Uruguay Montevideo Group 2 Bolivia   Yugoslavia  A       <NA>          
##  8  1930 Uruguay Montevideo Group 4 Paraguay  United Sta… A       <NA>          
##  9  1930 Uruguay Montevideo Group 3 Uruguay   Peru        H       <NA>          
## 10  1930 Uruguay Montevideo Group 1 Argentina Mexico      H       <NA>          
## # ℹ 890 more rows
## # ℹ 7 more variables: winning_team <chr>, losing_team <chr>, date <dttm>,
## #   month <chr>, dayofweek <chr>, home_score <dbl>, away_score <dbl>

Separating and Uniting

Separate a column

data_sep <- data %>%
    separate(col = date, into = c("year", "month", "day"))

data_sep
## # A tibble: 900 × 15
##    country city       stage   home_team away_team  home_score away_score outcome
##    <chr>   <chr>      <chr>   <chr>     <chr>           <dbl>      <dbl> <chr>  
##  1 Uruguay Montevideo Group 1 France    Mexico              4          1 H      
##  2 Uruguay Montevideo Group 4 Belgium   United St…          0          3 A      
##  3 Uruguay Montevideo Group 2 Brazil    Yugoslavia          1          2 A      
##  4 Uruguay Montevideo Group 3 Peru      Romania             1          3 A      
##  5 Uruguay Montevideo Group 1 Argentina France              1          0 H      
##  6 Uruguay Montevideo Group 1 Chile     Mexico              3          0 H      
##  7 Uruguay Montevideo Group 2 Bolivia   Yugoslavia          0          4 A      
##  8 Uruguay Montevideo Group 4 Paraguay  United St…          0          3 A      
##  9 Uruguay Montevideo Group 3 Uruguay   Peru                1          0 H      
## 10 Uruguay Montevideo Group 1 Argentina Mexico              6          3 H      
## # ℹ 890 more rows
## # ℹ 7 more variables: win_conditions <chr>, winning_team <chr>,
## #   losing_team <chr>, year <chr>, month <chr>, day <chr>, dayofweek <chr>

Unite two columns

data_unite <- data_sep %>%
    unite(col = "date", c(year, month, day), sep = "-")

data_unite
## # A tibble: 900 × 13
##    country city       stage   home_team away_team  home_score away_score outcome
##    <chr>   <chr>      <chr>   <chr>     <chr>           <dbl>      <dbl> <chr>  
##  1 Uruguay Montevideo Group 1 France    Mexico              4          1 H      
##  2 Uruguay Montevideo Group 4 Belgium   United St…          0          3 A      
##  3 Uruguay Montevideo Group 2 Brazil    Yugoslavia          1          2 A      
##  4 Uruguay Montevideo Group 3 Peru      Romania             1          3 A      
##  5 Uruguay Montevideo Group 1 Argentina France              1          0 H      
##  6 Uruguay Montevideo Group 1 Chile     Mexico              3          0 H      
##  7 Uruguay Montevideo Group 2 Bolivia   Yugoslavia          0          4 A      
##  8 Uruguay Montevideo Group 4 Paraguay  United St…          0          3 A      
##  9 Uruguay Montevideo Group 3 Uruguay   Peru                1          0 H      
## 10 Uruguay Montevideo Group 1 Argentina Mexico              6          3 H      
## # ℹ 890 more rows
## # ℹ 5 more variables: win_conditions <chr>, winning_team <chr>,
## #   losing_team <chr>, date <chr>, dayofweek <chr>

Missing Values

data %>%
    complete(year, winning_team) %>%
    select(year, stage, winning_team, home_team, away_team) %>%
    
    arrange(stage)
## # A tibble: 1,942 × 5
##     year stage winning_team home_team    away_team     
##    <dbl> <chr> <chr>        <chr>        <chr>         
##  1  1930 Final Uruguay      Uruguay      Argentina     
##  2  1934 Final Italy        Italy        Czechoslovakia
##  3  1938 Final Italy        Hungary      Italy         
##  4  1954 Final West Germany West Germany Hungary       
##  5  1958 Final Brazil       Sweden       Brazil        
##  6  1962 Final Brazil       Brazil       Czechoslovakia
##  7  1966 Final England      England      West Germany  
##  8  1970 Final Brazil       Brazil       Italy         
##  9  1974 Final West Germany West Germany Netherlands   
## 10  1978 Final Argentina    Argentina    Netherlands   
## # ℹ 1,932 more rows