Import your data

olympics <- read_excel("myData.xlsx")
olympics
## # A tibble: 271,116 × 15
##       id name     sex   age   height weight team  noc   games  year season city 
##    <dbl> <chr>    <chr> <chr> <chr>  <chr>  <chr> <chr> <chr> <dbl> <chr>  <chr>
##  1     1 A Dijia… M     24    180    80     China CHN   1992…  1992 Summer Barc…
##  2     2 A Lamusi M     23    170    60     China CHN   2012…  2012 Summer Lond…
##  3     3 Gunnar … M     24    NA     NA     Denm… DEN   1920…  1920 Summer Antw…
##  4     4 Edgar L… M     34    NA     NA     Denm… DEN   1900…  1900 Summer Paris
##  5     5 Christi… F     21    185    82     Neth… NED   1988…  1988 Winter Calg…
##  6     5 Christi… F     21    185    82     Neth… NED   1988…  1988 Winter Calg…
##  7     5 Christi… F     25    185    82     Neth… NED   1992…  1992 Winter Albe…
##  8     5 Christi… F     25    185    82     Neth… NED   1992…  1992 Winter Albe…
##  9     5 Christi… F     27    185    82     Neth… NED   1994…  1994 Winter Lill…
## 10     5 Christi… F     27    185    82     Neth… NED   1994…  1994 Winter Lill…
## # ℹ 271,106 more rows
## # ℹ 3 more variables: sport <chr>, event <chr>, medal <chr>

Pivoting

long to wide form

olympics_wide <- olympics %>%
  pivot_wider(names_from = season, values_from = medal)
olympics_wide
## # A tibble: 269,661 × 15
##       id name      sex   age   height weight team  noc   games  year city  sport
##    <dbl> <chr>     <chr> <chr> <chr>  <chr>  <chr> <chr> <chr> <dbl> <chr> <chr>
##  1     1 A Dijiang M     24    180    80     China CHN   1992…  1992 Barc… Bask…
##  2     2 A Lamusi  M     23    170    60     China CHN   2012…  2012 Lond… Judo 
##  3     3 Gunnar N… M     24    NA     NA     Denm… DEN   1920…  1920 Antw… Foot…
##  4     4 Edgar Li… M     34    NA     NA     Denm… DEN   1900…  1900 Paris Tug-…
##  5     5 Christin… F     21    185    82     Neth… NED   1988…  1988 Calg… Spee…
##  6     5 Christin… F     21    185    82     Neth… NED   1988…  1988 Calg… Spee…
##  7     5 Christin… F     25    185    82     Neth… NED   1992…  1992 Albe… Spee…
##  8     5 Christin… F     25    185    82     Neth… NED   1992…  1992 Albe… Spee…
##  9     5 Christin… F     27    185    82     Neth… NED   1994…  1994 Lill… Spee…
## 10     5 Christin… F     27    185    82     Neth… NED   1994…  1994 Lill… Spee…
## # ℹ 269,651 more rows
## # ℹ 3 more variables: event <chr>, Summer <list>, Winter <list>

wide to long form

olympics_wide %>%
  pivot_longer(cols = c(`Summer`, `Winter`),
               names_to = "season",
               values_to = "medal")
## # A tibble: 539,322 × 15
##       id name      sex   age   height weight team  noc   games  year city  sport
##    <dbl> <chr>     <chr> <chr> <chr>  <chr>  <chr> <chr> <chr> <dbl> <chr> <chr>
##  1     1 A Dijiang M     24    180    80     China CHN   1992…  1992 Barc… Bask…
##  2     1 A Dijiang M     24    180    80     China CHN   1992…  1992 Barc… Bask…
##  3     2 A Lamusi  M     23    170    60     China CHN   2012…  2012 Lond… Judo 
##  4     2 A Lamusi  M     23    170    60     China CHN   2012…  2012 Lond… Judo 
##  5     3 Gunnar N… M     24    NA     NA     Denm… DEN   1920…  1920 Antw… Foot…
##  6     3 Gunnar N… M     24    NA     NA     Denm… DEN   1920…  1920 Antw… Foot…
##  7     4 Edgar Li… M     34    NA     NA     Denm… DEN   1900…  1900 Paris Tug-…
##  8     4 Edgar Li… M     34    NA     NA     Denm… DEN   1900…  1900 Paris Tug-…
##  9     5 Christin… F     21    185    82     Neth… NED   1988…  1988 Calg… Spee…
## 10     5 Christin… F     21    185    82     Neth… NED   1988…  1988 Calg… Spee…
## # ℹ 539,312 more rows
## # ℹ 3 more variables: event <chr>, season <chr>, medal <list>

Separating and Uniting

Separate a column

olympics_sep <- olympics %>%
  separate(col = games, into = c("year_game", "season_game"))
olympics_sep
## # A tibble: 271,116 × 16
##       id name  sex   age   height weight team  noc   year_game season_game  year
##    <dbl> <chr> <chr> <chr> <chr>  <chr>  <chr> <chr> <chr>     <chr>       <dbl>
##  1     1 A Di… M     24    180    80     China CHN   1992      Summer       1992
##  2     2 A La… M     23    170    60     China CHN   2012      Summer       2012
##  3     3 Gunn… M     24    NA     NA     Denm… DEN   1920      Summer       1920
##  4     4 Edga… M     34    NA     NA     Denm… DEN   1900      Summer       1900
##  5     5 Chri… F     21    185    82     Neth… NED   1988      Winter       1988
##  6     5 Chri… F     21    185    82     Neth… NED   1988      Winter       1988
##  7     5 Chri… F     25    185    82     Neth… NED   1992      Winter       1992
##  8     5 Chri… F     25    185    82     Neth… NED   1992      Winter       1992
##  9     5 Chri… F     27    185    82     Neth… NED   1994      Winter       1994
## 10     5 Chri… F     27    185    82     Neth… NED   1994      Winter       1994
## # ℹ 271,106 more rows
## # ℹ 5 more variables: season <chr>, city <chr>, sport <chr>, event <chr>,
## #   medal <chr>

Unite two columns

olympics_sep %>%
  unite(col = "games", c(year_game, season_game), sep = " ")
## # A tibble: 271,116 × 15
##       id name     sex   age   height weight team  noc   games  year season city 
##    <dbl> <chr>    <chr> <chr> <chr>  <chr>  <chr> <chr> <chr> <dbl> <chr>  <chr>
##  1     1 A Dijia… M     24    180    80     China CHN   1992…  1992 Summer Barc…
##  2     2 A Lamusi M     23    170    60     China CHN   2012…  2012 Summer Lond…
##  3     3 Gunnar … M     24    NA     NA     Denm… DEN   1920…  1920 Summer Antw…
##  4     4 Edgar L… M     34    NA     NA     Denm… DEN   1900…  1900 Summer Paris
##  5     5 Christi… F     21    185    82     Neth… NED   1988…  1988 Winter Calg…
##  6     5 Christi… F     21    185    82     Neth… NED   1988…  1988 Winter Calg…
##  7     5 Christi… F     25    185    82     Neth… NED   1992…  1992 Winter Albe…
##  8     5 Christi… F     25    185    82     Neth… NED   1992…  1992 Winter Albe…
##  9     5 Christi… F     27    185    82     Neth… NED   1994…  1994 Winter Lill…
## 10     5 Christi… F     27    185    82     Neth… NED   1994…  1994 Winter Lill…
## # ℹ 271,106 more rows
## # ℹ 3 more variables: sport <chr>, event <chr>, medal <chr>

Missing Values

Hidden Missing Values

olympics %>%
  pivot_wider(names_from = season, values_from = medal)
## # A tibble: 269,661 × 15
##       id name      sex   age   height weight team  noc   games  year city  sport
##    <dbl> <chr>     <chr> <chr> <chr>  <chr>  <chr> <chr> <chr> <dbl> <chr> <chr>
##  1     1 A Dijiang M     24    180    80     China CHN   1992…  1992 Barc… Bask…
##  2     2 A Lamusi  M     23    170    60     China CHN   2012…  2012 Lond… Judo 
##  3     3 Gunnar N… M     24    NA     NA     Denm… DEN   1920…  1920 Antw… Foot…
##  4     4 Edgar Li… M     34    NA     NA     Denm… DEN   1900…  1900 Paris Tug-…
##  5     5 Christin… F     21    185    82     Neth… NED   1988…  1988 Calg… Spee…
##  6     5 Christin… F     21    185    82     Neth… NED   1988…  1988 Calg… Spee…
##  7     5 Christin… F     25    185    82     Neth… NED   1992…  1992 Albe… Spee…
##  8     5 Christin… F     25    185    82     Neth… NED   1992…  1992 Albe… Spee…
##  9     5 Christin… F     27    185    82     Neth… NED   1994…  1994 Lill… Spee…
## 10     5 Christin… F     27    185    82     Neth… NED   1994…  1994 Lill… Spee…
## # ℹ 269,651 more rows
## # ℹ 3 more variables: event <chr>, Summer <list>, Winter <list>

Show Missing Values

olympics %>%
  complete(season, medal)
## # A tibble: 271,116 × 15
##    season medal     id name    sex   age   height weight team  noc   games  year
##    <chr>  <chr>  <dbl> <chr>   <chr> <chr> <chr>  <chr>  <chr> <chr> <chr> <dbl>
##  1 Summer Bronze    15 Arvo O… M     30    NA     NA     Finl… FIN   1920…  1920
##  2 Summer Bronze    15 Arvo O… M     30    NA     NA     Finl… FIN   1920…  1920
##  3 Summer Bronze    17 Paavo … M     28    175    64     Finl… FIN   1948…  1948
##  4 Summer Bronze    17 Paavo … M     32    175    64     Finl… FIN   1952…  1952
##  5 Summer Bronze    29 Willem… F     22    NA     NA     Neth… NED   1988…  1988
##  6 Summer Bronze    37 Ann Kr… F     23    182    64     Norw… NOR   1996…  1996
##  7 Summer Bronze    62 Giovan… M     21    198    90     Italy ITA   2016…  2016
##  8 Summer Bronze    63 Jos Lu… M     30    194    87     Spain ESP   2008…  2008
##  9 Summer Bronze    65 Patima… F     21    165    49     Azer… AZE   2016…  2016
## 10 Summer Bronze    86 Jos Ma… M     26    182    67     Spain ESP   1984…  1984
## # ℹ 271,106 more rows
## # ℹ 3 more variables: city <chr>, sport <chr>, event <chr>

Fill missing ages

olympics %>%
  fill(age, .direction = "down")
## # A tibble: 271,116 × 15
##       id name     sex   age   height weight team  noc   games  year season city 
##    <dbl> <chr>    <chr> <chr> <chr>  <chr>  <chr> <chr> <chr> <dbl> <chr>  <chr>
##  1     1 A Dijia… M     24    180    80     China CHN   1992…  1992 Summer Barc…
##  2     2 A Lamusi M     23    170    60     China CHN   2012…  2012 Summer Lond…
##  3     3 Gunnar … M     24    NA     NA     Denm… DEN   1920…  1920 Summer Antw…
##  4     4 Edgar L… M     34    NA     NA     Denm… DEN   1900…  1900 Summer Paris
##  5     5 Christi… F     21    185    82     Neth… NED   1988…  1988 Winter Calg…
##  6     5 Christi… F     21    185    82     Neth… NED   1988…  1988 Winter Calg…
##  7     5 Christi… F     25    185    82     Neth… NED   1992…  1992 Winter Albe…
##  8     5 Christi… F     25    185    82     Neth… NED   1992…  1992 Winter Albe…
##  9     5 Christi… F     27    185    82     Neth… NED   1994…  1994 Winter Lill…
## 10     5 Christi… F     27    185    82     Neth… NED   1994…  1994 Winter Lill…
## # ℹ 271,106 more rows
## # ℹ 3 more variables: sport <chr>, event <chr>, medal <chr>