excel file

Pivoting

long to wide form

 myData %>%
   
     pivot_longer(cols = c(`goals`,`season`), 
                 names_to = "year",
                 values_to = "goals")
## # A tibble: 98,768 × 25
##    player       rank date                game_…¹ age   team  at    opp   locat…²
##    <chr>       <dbl> <dttm>                <dbl> <chr> <chr> <chr> <chr> <chr>  
##  1 Alex Ovech…     1 2005-10-05 00:00:00       1 20-0… WSH   NA    CBJ   Home   
##  2 Alex Ovech…     1 2005-10-05 00:00:00       1 20-0… WSH   NA    CBJ   Home   
##  3 Alex Ovech…     2 2005-10-07 00:00:00       2 20-0… WSH   NA    ATL   Home   
##  4 Alex Ovech…     2 2005-10-07 00:00:00       2 20-0… WSH   NA    ATL   Home   
##  5 Alex Ovech…     3 2005-10-08 00:00:00       3 20-0… WSH   @     ATL   Away   
##  6 Alex Ovech…     3 2005-10-08 00:00:00       3 20-0… WSH   @     ATL   Away   
##  7 Alex Ovech…     4 2005-10-10 00:00:00       4 20-0… WSH   NA    NYR   Home   
##  8 Alex Ovech…     4 2005-10-10 00:00:00       4 20-0… WSH   NA    NYR   Home   
##  9 Alex Ovech…     5 2005-10-12 00:00:00       5 20-0… WSH   @     CAR   Away   
## 10 Alex Ovech…     5 2005-10-12 00:00:00       5 20-0… WSH   @     CAR   Away   
## # … with 98,758 more rows, 16 more variables: outcome <chr>, assists <dbl>,
## #   points <dbl>, plus_minus <dbl>, penalty_min <dbl>, goals_even <dbl>,
## #   goals_powerplay <dbl>, goals_short <dbl>, goals_gamewinner <dbl>,
## #   assists_even <chr>, assists_powerplay <chr>, assists_short <chr>,
## #   shots <dbl>, shot_percent <chr>, year <chr>, goals <dbl>, and abbreviated
## #   variable names ¹​game_num, ²​location

wide to long form

Separating and Uniting

myData %>%
    
    separate(col = player, into =  c ("date","shots"))
## # A tibble: 49,384 × 24
##    date  shots    season  rank game_num age    team  at    opp   locat…¹ outcome
##    <chr> <chr>     <dbl> <dbl>    <dbl> <chr>  <chr> <chr> <chr> <chr>   <chr>  
##  1 Alex  Ovechkin   2006     1        1 20-018 WSH   NA    CBJ   Home    W      
##  2 Alex  Ovechkin   2006     2        2 20-020 WSH   NA    ATL   Home    L      
##  3 Alex  Ovechkin   2006     3        3 20-021 WSH   @     ATL   Away    L      
##  4 Alex  Ovechkin   2006     4        4 20-023 WSH   NA    NYR   Home    W      
##  5 Alex  Ovechkin   2006     5        5 20-025 WSH   @     CAR   Away    L      
##  6 Alex  Ovechkin   2006     6        6 20-026 WSH   NA    NYI   Home    L      
##  7 Alex  Ovechkin   2006     7        7 20-029 WSH   NA    TBL   Home    W      
##  8 Alex  Ovechkin   2006     8        8 20-033 WSH   @     FLA   Away    L      
##  9 Alex  Ovechkin   2006     9        9 20-035 WSH   NA    CAR   Home    L      
## 10 Alex  Ovechkin   2006    10       10 20-039 WSH   @     BUF   Away    W      
## # … with 49,374 more rows, 13 more variables: goals <dbl>, assists <dbl>,
## #   points <dbl>, plus_minus <dbl>, penalty_min <dbl>, goals_even <dbl>,
## #   goals_powerplay <dbl>, goals_short <dbl>, goals_gamewinner <dbl>,
## #   assists_even <chr>, assists_powerplay <chr>, assists_short <chr>,
## #   shot_percent <chr>, and abbreviated variable name ¹​location

Separate a column

myData %>%
    
    separate(col = player, into =  c ("date","shots"))
## # A tibble: 49,384 × 24
##    date  shots    season  rank game_num age    team  at    opp   locat…¹ outcome
##    <chr> <chr>     <dbl> <dbl>    <dbl> <chr>  <chr> <chr> <chr> <chr>   <chr>  
##  1 Alex  Ovechkin   2006     1        1 20-018 WSH   NA    CBJ   Home    W      
##  2 Alex  Ovechkin   2006     2        2 20-020 WSH   NA    ATL   Home    L      
##  3 Alex  Ovechkin   2006     3        3 20-021 WSH   @     ATL   Away    L      
##  4 Alex  Ovechkin   2006     4        4 20-023 WSH   NA    NYR   Home    W      
##  5 Alex  Ovechkin   2006     5        5 20-025 WSH   @     CAR   Away    L      
##  6 Alex  Ovechkin   2006     6        6 20-026 WSH   NA    NYI   Home    L      
##  7 Alex  Ovechkin   2006     7        7 20-029 WSH   NA    TBL   Home    W      
##  8 Alex  Ovechkin   2006     8        8 20-033 WSH   @     FLA   Away    L      
##  9 Alex  Ovechkin   2006     9        9 20-035 WSH   NA    CAR   Home    L      
## 10 Alex  Ovechkin   2006    10       10 20-039 WSH   @     BUF   Away    W      
## # … with 49,374 more rows, 13 more variables: goals <dbl>, assists <dbl>,
## #   points <dbl>, plus_minus <dbl>, penalty_min <dbl>, goals_even <dbl>,
## #   goals_powerplay <dbl>, goals_short <dbl>, goals_gamewinner <dbl>,
## #   assists_even <chr>, assists_powerplay <chr>, assists_short <chr>,
## #   shot_percent <chr>, and abbreviated variable name ¹​location

Unite two columns

myData %>%
    
    unite(col = "date",c(goals:shots), sep = "/",)
## # A tibble: 49,384 × 12
##    player     season  rank game_…¹ age   team  at    opp   locat…² outcome date 
##    <chr>       <dbl> <dbl>   <dbl> <chr> <chr> <chr> <chr> <chr>   <chr>   <chr>
##  1 Alex Ovec…   2006     1       1 20-0… WSH   NA    CBJ   Home    W       2/0/…
##  2 Alex Ovec…   2006     2       2 20-0… WSH   NA    ATL   Home    L       0/1/…
##  3 Alex Ovec…   2006     3       3 20-0… WSH   @     ATL   Away    L       0/1/…
##  4 Alex Ovec…   2006     4       4 20-0… WSH   NA    NYR   Home    W       1/0/…
##  5 Alex Ovec…   2006     5       5 20-0… WSH   @     CAR   Away    L       1/0/…
##  6 Alex Ovec…   2006     6       6 20-0… WSH   NA    NYI   Home    L       0/1/…
##  7 Alex Ovec…   2006     7       7 20-0… WSH   NA    TBL   Home    W       0/1/…
##  8 Alex Ovec…   2006     8       8 20-0… WSH   @     FLA   Away    L       2/0/…
##  9 Alex Ovec…   2006     9       9 20-0… WSH   NA    CAR   Home    L       0/0/…
## 10 Alex Ovec…   2006    10      10 20-0… WSH   @     BUF   Away    W       0/1/…
## # … with 49,374 more rows, 1 more variable: shot_percent <chr>, and abbreviated
## #   variable names ¹​game_num, ²​location

Missing Values

myData %>%
    
    pivot_wider(names_from = shots, values_from = goals)
## # A tibble: 49,384 × 39
##    player       season  rank date                game_…¹ age   team  at    opp  
##    <chr>         <dbl> <dbl> <dttm>                <dbl> <chr> <chr> <chr> <chr>
##  1 Alex Ovechk…   2006     1 2005-10-05 00:00:00       1 20-0… WSH   NA    CBJ  
##  2 Alex Ovechk…   2006     2 2005-10-07 00:00:00       2 20-0… WSH   NA    ATL  
##  3 Alex Ovechk…   2006     3 2005-10-08 00:00:00       3 20-0… WSH   @     ATL  
##  4 Alex Ovechk…   2006     4 2005-10-10 00:00:00       4 20-0… WSH   NA    NYR  
##  5 Alex Ovechk…   2006     5 2005-10-12 00:00:00       5 20-0… WSH   @     CAR  
##  6 Alex Ovechk…   2006     6 2005-10-13 00:00:00       6 20-0… WSH   NA    NYI  
##  7 Alex Ovechk…   2006     7 2005-10-16 00:00:00       7 20-0… WSH   NA    TBL  
##  8 Alex Ovechk…   2006     8 2005-10-20 00:00:00       8 20-0… WSH   @     FLA  
##  9 Alex Ovechk…   2006     9 2005-10-22 00:00:00       9 20-0… WSH   NA    CAR  
## 10 Alex Ovechk…   2006    10 2005-10-26 00:00:00      10 20-0… WSH   @     BUF  
## # … with 49,374 more rows, 30 more variables: location <chr>, outcome <chr>,
## #   assists <dbl>, points <dbl>, plus_minus <dbl>, penalty_min <dbl>,
## #   goals_even <dbl>, goals_powerplay <dbl>, goals_short <dbl>,
## #   goals_gamewinner <dbl>, assists_even <chr>, assists_powerplay <chr>,
## #   assists_short <chr>, shot_percent <chr>, `5` <dbl>, `1` <dbl>, `3` <dbl>,
## #   `6` <dbl>, `2` <dbl>, `10` <dbl>, `4` <dbl>, `7` <dbl>, `8` <dbl>,
## #   `9` <dbl>, `0` <dbl>, `11` <dbl>, `13` <dbl>, `12` <dbl>, `15` <dbl>, …
myData %>%
    
    pivot_wider(names_from = player, values_from = goals)
## # A tibble: 49,384 × 65
##    season  rank date                game_num age    team  at    opp   location
##     <dbl> <dbl> <dttm>                 <dbl> <chr>  <chr> <chr> <chr> <chr>   
##  1   2006     1 2005-10-05 00:00:00        1 20-018 WSH   NA    CBJ   Home    
##  2   2006     2 2005-10-07 00:00:00        2 20-020 WSH   NA    ATL   Home    
##  3   2006     3 2005-10-08 00:00:00        3 20-021 WSH   @     ATL   Away    
##  4   2006     4 2005-10-10 00:00:00        4 20-023 WSH   NA    NYR   Home    
##  5   2006     5 2005-10-12 00:00:00        5 20-025 WSH   @     CAR   Away    
##  6   2006     6 2005-10-13 00:00:00        6 20-026 WSH   NA    NYI   Home    
##  7   2006     7 2005-10-16 00:00:00        7 20-029 WSH   NA    TBL   Home    
##  8   2006     8 2005-10-20 00:00:00        8 20-033 WSH   @     FLA   Away    
##  9   2006     9 2005-10-22 00:00:00        9 20-035 WSH   NA    CAR   Home    
## 10   2006    10 2005-10-26 00:00:00       10 20-039 WSH   @     BUF   Away    
## # … with 49,374 more rows, and 56 more variables: outcome <chr>, assists <dbl>,
## #   points <dbl>, plus_minus <dbl>, penalty_min <dbl>, goals_even <dbl>,
## #   goals_powerplay <dbl>, goals_short <dbl>, goals_gamewinner <dbl>,
## #   assists_even <chr>, assists_powerplay <chr>, assists_short <chr>,
## #   shots <dbl>, shot_percent <chr>, `Alex Ovechkin` <dbl>,
## #   `Anze Kopitar` <dbl>, `Brad Marchand` <dbl>, `Brendan Shanahan` <dbl>,
## #   `Brett Hull` <dbl>, `Corey Perry` <dbl>, `Dave Andreychuk` <dbl>, …
myData %>%
    
    complete(player, goals)
## # A tibble: 49,438 × 25
##    player goals season  rank date                game_…¹ age   team  at    opp  
##    <chr>  <dbl>  <dbl> <dbl> <dttm>                <dbl> <chr> <chr> <chr> <chr>
##  1 Alex …     0   2006     2 2005-10-07 00:00:00       2 20-0… WSH   NA    ATL  
##  2 Alex …     0   2006     3 2005-10-08 00:00:00       3 20-0… WSH   @     ATL  
##  3 Alex …     0   2006     6 2005-10-13 00:00:00       6 20-0… WSH   NA    NYI  
##  4 Alex …     0   2006     7 2005-10-16 00:00:00       7 20-0… WSH   NA    TBL  
##  5 Alex …     0   2006     9 2005-10-22 00:00:00       9 20-0… WSH   NA    CAR  
##  6 Alex …     0   2006    10 2005-10-26 00:00:00      10 20-0… WSH   @     BUF  
##  7 Alex …     0   2006    12 2005-11-03 00:00:00      12 20-0… WSH   @     PHI  
##  8 Alex …     0   2006    13 2005-11-04 00:00:00      13 20-0… WSH   NA    ATL  
##  9 Alex …     0   2006    17 2005-11-12 00:00:00      17 20-0… WSH   @     NJD  
## 10 Alex …     0   2006    20 2005-11-19 00:00:00      20 20-0… WSH   @     MTL  
## # … with 49,428 more rows, 15 more variables: location <chr>, outcome <chr>,
## #   assists <dbl>, points <dbl>, plus_minus <dbl>, penalty_min <dbl>,
## #   goals_even <dbl>, goals_powerplay <dbl>, goals_short <dbl>,
## #   goals_gamewinner <dbl>, assists_even <chr>, assists_powerplay <chr>,
## #   assists_short <chr>, shots <dbl>, shot_percent <chr>, and abbreviated
## #   variable name ¹​game_num