Tidy data
data_small <- data %>%
select(birth_country, first_name,last_name)
data_small
## # A tibble: 8,474 × 3
## birth_country first_name last_name
## <chr> <chr> <chr>
## 1 CAN Bryan Adams
## 2 CAN Donald Audette
## 3 CAN Eric Bertrand
## 4 CAN Jason Botterill
## 5 CAN Andrew Brunette
## 6 CAN Kelly Buchberger
## 7 CAN Hnat Domenichelli
## 8 CAN Shean Donovan
## 9 CAN Nelson Emerson
## 10 CAN Ray Ferraro
## # ℹ 8,464 more rows
Separating and Uniting
Separate a column
data_long
## # A tibble: 16,948 × 3
## birth_country name last_name
## <chr> <chr> <chr>
## 1 CAN first_name Bryan
## 2 CAN last_name Adams
## 3 CAN first_name Donald
## 4 CAN last_name Audette
## 5 CAN first_name Eric
## 6 CAN last_name Bertrand
## 7 CAN first_name Jason
## 8 CAN last_name Botterill
## 9 CAN first_name Andrew
## 10 CAN last_name Brunette
## # ℹ 16,938 more rows
data_long_sep <- data_long %>%
separate(col = name, into = c("birth_country", "name"))
data_long_sep
## # A tibble: 16,948 × 3
## birth_country name last_name
## <chr> <chr> <chr>
## 1 first name Bryan
## 2 last name Adams
## 3 first name Donald
## 4 last name Audette
## 5 first name Eric
## 6 last name Bertrand
## 7 first name Jason
## 8 last name Botterill
## 9 first name Andrew
## 10 last name Brunette
## # ℹ 16,938 more rows
Unite two columns
data_long_sep %>%
unite(col = "W", c(birth_country,last_name), sep = "/", )
## # A tibble: 16,948 × 2
## W name
## <chr> <chr>
## 1 first/Bryan name
## 2 last/Adams name
## 3 first/Donald name
## 4 last/Audette name
## 5 first/Eric name
## 6 last/Bertrand name
## 7 first/Jason name
## 8 last/Botterill name
## 9 first/Andrew name
## 10 last/Brunette name
## # ℹ 16,938 more rows
Missing Values
data_small %>%
pivot_wider(names_from = birth_country, values_from = last_name)
## # A tibble: 1,411 × 47
## first_name CAN SWE CZE USA LVA RUS NGA FIN SVK
## <chr> <list> <list> <list> <list> <list> <list> <list> <list> <list>
## 1 Bryan <chr [19]> <NULL> <NULL> <chr> <NULL> <NULL> <NULL> <NULL> <NULL>
## 2 Donald <chr [5]> <NULL> <NULL> <chr> <NULL> <NULL> <NULL> <NULL> <NULL>
## 3 Eric <chr [33]> <NULL> <NULL> <chr> <NULL> <NULL> <NULL> <NULL> <NULL>
## 4 Jason <chr [39]> <NULL> <NULL> <chr> <NULL> <NULL> <NULL> <NULL> <NULL>
## 5 Andrew <chr [24]> <NULL> <NULL> <chr> <NULL> <NULL> <NULL> <NULL> <NULL>
## 6 Kelly <chr [6]> <NULL> <NULL> <chr> <NULL> <NULL> <NULL> <NULL> <NULL>
## 7 Hnat <chr [1]> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL>
## 8 Shean <chr [1]> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL>
## 9 Nelson <chr [4]> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL>
## 10 Ray <chr [24]> <NULL> <NULL> <chr> <NULL> <NULL> <NULL> <NULL> <NULL>
## # ℹ 1,401 more rows
## # ℹ 37 more variables: DEU <list>, GBR <list>, UKR <list>, KAZ <list>,
## # POL <list>, PRY <list>, CHE <list>, FRA <list>, VEN <list>, NLD <list>,
## # SRB <list>, HTI <list>, LTU <list>, BLR <list>, AUT <list>, IRL <list>,
## # LBN <list>, DNK <list>, ITA <list>, NOR <list>, SVN <list>, JAM <list>,
## # KOR <list>, BRN <list>, ZAF <list>, BEL <list>, BHS <list>, EST <list>,
## # BRA <list>, IDN <list>, TWN <list>, JPN <list>, UZB <list>, AUS <list>, …
data_small %>%
pivot_wider(names_from = birth_country, values_from = last_name)
## # A tibble: 1,411 × 47
## first_name CAN SWE CZE USA LVA RUS NGA FIN SVK
## <chr> <list> <list> <list> <list> <list> <list> <list> <list> <list>
## 1 Bryan <chr [19]> <NULL> <NULL> <chr> <NULL> <NULL> <NULL> <NULL> <NULL>
## 2 Donald <chr [5]> <NULL> <NULL> <chr> <NULL> <NULL> <NULL> <NULL> <NULL>
## 3 Eric <chr [33]> <NULL> <NULL> <chr> <NULL> <NULL> <NULL> <NULL> <NULL>
## 4 Jason <chr [39]> <NULL> <NULL> <chr> <NULL> <NULL> <NULL> <NULL> <NULL>
## 5 Andrew <chr [24]> <NULL> <NULL> <chr> <NULL> <NULL> <NULL> <NULL> <NULL>
## 6 Kelly <chr [6]> <NULL> <NULL> <chr> <NULL> <NULL> <NULL> <NULL> <NULL>
## 7 Hnat <chr [1]> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL>
## 8 Shean <chr [1]> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL>
## 9 Nelson <chr [4]> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL>
## 10 Ray <chr [24]> <NULL> <NULL> <chr> <NULL> <NULL> <NULL> <NULL> <NULL>
## # ℹ 1,401 more rows
## # ℹ 37 more variables: DEU <list>, GBR <list>, UKR <list>, KAZ <list>,
## # POL <list>, PRY <list>, CHE <list>, FRA <list>, VEN <list>, NLD <list>,
## # SRB <list>, HTI <list>, LTU <list>, BLR <list>, AUT <list>, IRL <list>,
## # LBN <list>, DNK <list>, ITA <list>, NOR <list>, SVN <list>, JAM <list>,
## # KOR <list>, BRN <list>, ZAF <list>, BEL <list>, BHS <list>, EST <list>,
## # BRA <list>, IDN <list>, TWN <list>, JPN <list>, UZB <list>, AUS <list>, …
data_small %>%
complete(birth_country, last_name)
## # A tibble: 263,303 × 3
## birth_country last_name first_name
## <chr> <chr> <chr>
## 1 AUS Aalto <NA>
## 2 AUS Abbott <NA>
## 3 AUS Abdelkader <NA>
## 4 AUS Abel <NA>
## 5 AUS Aberg <NA>
## 6 AUS Abgrall <NA>
## 7 AUS Abid <NA>
## 8 AUS Abrahamsson <NA>
## 9 AUS Abramov <NA>
## 10 AUS Abruzzese <NA>
## # ℹ 263,293 more rows