Import your data
olympics <- read_excel("myData.xlsx")
olympics
## # A tibble: 271,116 × 15
## id name sex age height weight team noc games year season city
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 1 A Dijia… M 24 180 80 China CHN 1992… 1992 Summer Barc…
## 2 2 A Lamusi M 23 170 60 China CHN 2012… 2012 Summer Lond…
## 3 3 Gunnar … M 24 NA NA Denm… DEN 1920… 1920 Summer Antw…
## 4 4 Edgar L… M 34 NA NA Denm… DEN 1900… 1900 Summer Paris
## 5 5 Christi… F 21 185 82 Neth… NED 1988… 1988 Winter Calg…
## 6 5 Christi… F 21 185 82 Neth… NED 1988… 1988 Winter Calg…
## 7 5 Christi… F 25 185 82 Neth… NED 1992… 1992 Winter Albe…
## 8 5 Christi… F 25 185 82 Neth… NED 1992… 1992 Winter Albe…
## 9 5 Christi… F 27 185 82 Neth… NED 1994… 1994 Winter Lill…
## 10 5 Christi… F 27 185 82 Neth… NED 1994… 1994 Winter Lill…
## # ℹ 271,106 more rows
## # ℹ 3 more variables: sport <chr>, event <chr>, medal <chr>
Separating and Uniting
Separate a column
olympics_sep <- olympics %>%
separate(col = games, into = c("year_game", "season_game"))
olympics_sep
## # A tibble: 271,116 × 16
## id name sex age height weight team noc year_game season_game year
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 1 A Di… M 24 180 80 China CHN 1992 Summer 1992
## 2 2 A La… M 23 170 60 China CHN 2012 Summer 2012
## 3 3 Gunn… M 24 NA NA Denm… DEN 1920 Summer 1920
## 4 4 Edga… M 34 NA NA Denm… DEN 1900 Summer 1900
## 5 5 Chri… F 21 185 82 Neth… NED 1988 Winter 1988
## 6 5 Chri… F 21 185 82 Neth… NED 1988 Winter 1988
## 7 5 Chri… F 25 185 82 Neth… NED 1992 Winter 1992
## 8 5 Chri… F 25 185 82 Neth… NED 1992 Winter 1992
## 9 5 Chri… F 27 185 82 Neth… NED 1994 Winter 1994
## 10 5 Chri… F 27 185 82 Neth… NED 1994 Winter 1994
## # ℹ 271,106 more rows
## # ℹ 5 more variables: season <chr>, city <chr>, sport <chr>, event <chr>,
## # medal <chr>
Unite two columns
olympics_sep %>%
unite(col = "games", c(year_game, season_game), sep = " ")
## # A tibble: 271,116 × 15
## id name sex age height weight team noc games year season city
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 1 A Dijia… M 24 180 80 China CHN 1992… 1992 Summer Barc…
## 2 2 A Lamusi M 23 170 60 China CHN 2012… 2012 Summer Lond…
## 3 3 Gunnar … M 24 NA NA Denm… DEN 1920… 1920 Summer Antw…
## 4 4 Edgar L… M 34 NA NA Denm… DEN 1900… 1900 Summer Paris
## 5 5 Christi… F 21 185 82 Neth… NED 1988… 1988 Winter Calg…
## 6 5 Christi… F 21 185 82 Neth… NED 1988… 1988 Winter Calg…
## 7 5 Christi… F 25 185 82 Neth… NED 1992… 1992 Winter Albe…
## 8 5 Christi… F 25 185 82 Neth… NED 1992… 1992 Winter Albe…
## 9 5 Christi… F 27 185 82 Neth… NED 1994… 1994 Winter Lill…
## 10 5 Christi… F 27 185 82 Neth… NED 1994… 1994 Winter Lill…
## # ℹ 271,106 more rows
## # ℹ 3 more variables: sport <chr>, event <chr>, medal <chr>
Missing Values
Hidden Missing Values
olympics %>%
pivot_wider(names_from = season, values_from = medal)
## # A tibble: 269,661 × 15
## id name sex age height weight team noc games year city sport
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 1 A Dijiang M 24 180 80 China CHN 1992… 1992 Barc… Bask…
## 2 2 A Lamusi M 23 170 60 China CHN 2012… 2012 Lond… Judo
## 3 3 Gunnar N… M 24 NA NA Denm… DEN 1920… 1920 Antw… Foot…
## 4 4 Edgar Li… M 34 NA NA Denm… DEN 1900… 1900 Paris Tug-…
## 5 5 Christin… F 21 185 82 Neth… NED 1988… 1988 Calg… Spee…
## 6 5 Christin… F 21 185 82 Neth… NED 1988… 1988 Calg… Spee…
## 7 5 Christin… F 25 185 82 Neth… NED 1992… 1992 Albe… Spee…
## 8 5 Christin… F 25 185 82 Neth… NED 1992… 1992 Albe… Spee…
## 9 5 Christin… F 27 185 82 Neth… NED 1994… 1994 Lill… Spee…
## 10 5 Christin… F 27 185 82 Neth… NED 1994… 1994 Lill… Spee…
## # ℹ 269,651 more rows
## # ℹ 3 more variables: event <chr>, Summer <list>, Winter <list>
Show Missing Values
olympics %>%
complete(season, medal)
## # A tibble: 271,116 × 15
## season medal id name sex age height weight team noc games year
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 Summer Bronze 15 Arvo O… M 30 NA NA Finl… FIN 1920… 1920
## 2 Summer Bronze 15 Arvo O… M 30 NA NA Finl… FIN 1920… 1920
## 3 Summer Bronze 17 Paavo … M 28 175 64 Finl… FIN 1948… 1948
## 4 Summer Bronze 17 Paavo … M 32 175 64 Finl… FIN 1952… 1952
## 5 Summer Bronze 29 Willem… F 22 NA NA Neth… NED 1988… 1988
## 6 Summer Bronze 37 Ann Kr… F 23 182 64 Norw… NOR 1996… 1996
## 7 Summer Bronze 62 Giovan… M 21 198 90 Italy ITA 2016… 2016
## 8 Summer Bronze 63 Jos Lu… M 30 194 87 Spain ESP 2008… 2008
## 9 Summer Bronze 65 Patima… F 21 165 49 Azer… AZE 2016… 2016
## 10 Summer Bronze 86 Jos Ma… M 26 182 67 Spain ESP 1984… 1984
## # ℹ 271,106 more rows
## # ℹ 3 more variables: city <chr>, sport <chr>, event <chr>
Fill missing ages
olympics %>%
fill(age, .direction = "down")
## # A tibble: 271,116 × 15
## id name sex age height weight team noc games year season city
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 1 A Dijia… M 24 180 80 China CHN 1992… 1992 Summer Barc…
## 2 2 A Lamusi M 23 170 60 China CHN 2012… 2012 Summer Lond…
## 3 3 Gunnar … M 24 NA NA Denm… DEN 1920… 1920 Summer Antw…
## 4 4 Edgar L… M 34 NA NA Denm… DEN 1900… 1900 Summer Paris
## 5 5 Christi… F 21 185 82 Neth… NED 1988… 1988 Winter Calg…
## 6 5 Christi… F 21 185 82 Neth… NED 1988… 1988 Winter Calg…
## 7 5 Christi… F 25 185 82 Neth… NED 1992… 1992 Winter Albe…
## 8 5 Christi… F 25 185 82 Neth… NED 1992… 1992 Winter Albe…
## 9 5 Christi… F 27 185 82 Neth… NED 1994… 1994 Winter Lill…
## 10 5 Christi… F 27 185 82 Neth… NED 1994… 1994 Winter Lill…
## # ℹ 271,106 more rows
## # ℹ 3 more variables: sport <chr>, event <chr>, medal <chr>