Import your data

# excel filer
games <- read_excel("../00_data/MyData_charts.xlsx")
games
## # A tibble: 988 × 15
##     year country city    stage home_team away_team home_score away_score outcome
##    <dbl> <chr>   <chr>   <chr> <chr>     <chr>          <dbl>      <dbl> <chr>  
##  1  1930 Uruguay Montev… Grou… France    Mexico             4          1 H      
##  2  1930 Uruguay Montev… Grou… Belgium   United S…          0          3 A      
##  3  1930 Uruguay Montev… Grou… Brazil    Yugoslav…          1          2 A      
##  4  1930 Uruguay Montev… Grou… Peru      Romania            1          3 A      
##  5  1930 Uruguay Montev… Grou… Argentina France             1          0 H      
##  6  1930 Uruguay Montev… Grou… Chile     Mexico             3          0 H      
##  7  1930 Uruguay Montev… Grou… Bolivia   Yugoslav…          0          4 A      
##  8  1930 Uruguay Montev… Grou… Paraguay  United S…          0          3 A      
##  9  1930 Uruguay Montev… Grou… Uruguay   Peru               1          0 H      
## 10  1930 Uruguay Montev… Grou… Argentina Mexico             6          3 H      
## # ℹ 978 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## #   losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>

Pivoting

long to wide form

games_long <- games %>%
    pivot_longer(cols = c(`home_score`, `away_score`),
                 names_to = "hometeam", 
                 values_to = "awayteam")

wide to long form

games_long %>%
    pivot_wider(names_from = hometeam, values_from = awayteam)
## Warning: Values from `awayteam` are not uniquely identified; output will contain
## list-cols.
## • Use `values_fn = list` to suppress this warning.
## • Use `values_fn = {summary_fun}` to summarise duplicates.
## • Use the following dplyr code to identify duplicates.
##   {data} |>
##   dplyr::summarise(n = dplyr::n(), .by = c(year, country, city, stage,
##   home_team, away_team, outcome, win_conditions, winning_team, losing_team,
##   date, month, dayofweek, hometeam)) |>
##   dplyr::filter(n > 1L)
## # A tibble: 901 × 15
##     year country city       stage   home_team away_team   outcome win_conditions
##    <dbl> <chr>   <chr>      <chr>   <chr>     <chr>       <chr>   <chr>         
##  1  1930 Uruguay Montevideo Group 1 France    Mexico      H       <NA>          
##  2  1930 Uruguay Montevideo Group 4 Belgium   United Sta… A       <NA>          
##  3  1930 Uruguay Montevideo Group 2 Brazil    Yugoslavia  A       <NA>          
##  4  1930 Uruguay Montevideo Group 3 Peru      Romania     A       <NA>          
##  5  1930 Uruguay Montevideo Group 1 Argentina France      H       <NA>          
##  6  1930 Uruguay Montevideo Group 1 Chile     Mexico      H       <NA>          
##  7  1930 Uruguay Montevideo Group 2 Bolivia   Yugoslavia  A       <NA>          
##  8  1930 Uruguay Montevideo Group 4 Paraguay  United Sta… A       <NA>          
##  9  1930 Uruguay Montevideo Group 3 Uruguay   Peru        H       <NA>          
## 10  1930 Uruguay Montevideo Group 1 Argentina Mexico      H       <NA>          
## # ℹ 891 more rows
## # ℹ 7 more variables: winning_team <chr>, losing_team <chr>, date <dttm>,
## #   month <chr>, dayofweek <chr>, home_score <list>, away_score <list>

Separating and Uniting

Separate a column

games_sep <- games %>%
    
    separate(col = date, into = c("year", "month", "day"))

Unite two columns

games_unite <- games_sep %>%
    
    unite(col = "date", year:day, sep = "-")

games_sep %>%
    
    unite(col = "rate", c(year,day), sep = "-")
## # A tibble: 988 × 14
##    country city       stage   home_team away_team  home_score away_score outcome
##    <chr>   <chr>      <chr>   <chr>     <chr>           <dbl>      <dbl> <chr>  
##  1 Uruguay Montevideo Group 1 France    Mexico              4          1 H      
##  2 Uruguay Montevideo Group 4 Belgium   United St…          0          3 A      
##  3 Uruguay Montevideo Group 2 Brazil    Yugoslavia          1          2 A      
##  4 Uruguay Montevideo Group 3 Peru      Romania             1          3 A      
##  5 Uruguay Montevideo Group 1 Argentina France              1          0 H      
##  6 Uruguay Montevideo Group 1 Chile     Mexico              3          0 H      
##  7 Uruguay Montevideo Group 2 Bolivia   Yugoslavia          0          4 A      
##  8 Uruguay Montevideo Group 4 Paraguay  United St…          0          3 A      
##  9 Uruguay Montevideo Group 3 Uruguay   Peru                1          0 H      
## 10 Uruguay Montevideo Group 1 Argentina Mexico              6          3 H      
## # ℹ 978 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## #   losing_team <chr>, rate <chr>, month <chr>, dayofweek <chr>

Missing Values

games %>%
    
    complete(year, outcome)
## # A tibble: 1,013 × 15
##     year outcome country city    stage home_team away_team home_score away_score
##    <dbl> <chr>   <chr>   <chr>   <chr> <chr>     <chr>          <dbl>      <dbl>
##  1  1930 A       Uruguay Montev… Grou… Belgium   United S…          0          3
##  2  1930 A       Uruguay Montev… Grou… Brazil    Yugoslav…          1          2
##  3  1930 A       Uruguay Montev… Grou… Peru      Romania            1          3
##  4  1930 A       Uruguay Montev… Grou… Bolivia   Yugoslav…          0          4
##  5  1930 A       Uruguay Montev… Grou… Paraguay  United S…          0          3
##  6  1930 A       Uruguay Montev… Grou… Belgium   Paraguay           0          1
##  7  1930 A       Uruguay Montev… Grou… Bolivia   Brazil             0          4
##  8  1930 D       <NA>    <NA>    <NA>  <NA>      <NA>              NA         NA
##  9  1930 H       Uruguay Montev… Grou… France    Mexico             4          1
## 10  1930 H       Uruguay Montev… Grou… Argentina France             1          0
## # ℹ 1,003 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## #   losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>