Import your data
data <- read_excel("../00_data/myData.xlsx", sheet = "nhl_rosters")
data
## # A tibble: 54,883 × 18
## team_code season position_type player_id headshot first_name last_name
## <chr> <dbl> <chr> <dbl> <chr> <chr> <chr>
## 1 ATL 19992000 forwards 8467867 https://asse… Bryan Adams
## 2 ATL 19992000 forwards 8445176 https://asse… Donald Audette
## 3 ATL 19992000 forwards 8460014 https://asse… Eric Bertrand
## 4 ATL 19992000 forwards 8460510 https://asse… Jason Botterill
## 5 ATL 19992000 forwards 8459596 https://asse… Andrew Brunette
## 6 ATL 19992000 forwards 8445733 https://asse… Kelly Buchberg…
## 7 ATL 19992000 forwards 8460573 https://asse… Hnat Domenich…
## 8 ATL 19992000 forwards 8459450 https://asse… Shean Donovan
## 9 ATL 19992000 forwards 8446675 https://asse… Nelson Emerson
## 10 ATL 19992000 forwards 8446823 https://asse… Ray Ferraro
## # ℹ 54,873 more rows
## # ℹ 11 more variables: sweater_number <chr>, position_code <chr>,
## # shoots_catches <chr>, height_in_inches <dbl>, weight_in_pounds <dbl>,
## # height_in_centimeters <dbl>, weight_in_kilograms <dbl>, birth_date <dttm>,
## # birth_city <chr>, birth_country <chr>, birth_state_province <chr>
# For reproducible outcome
set.seed(1234)
data_small <- data %>%
select(team_code, position_code, height_in_inches) %>%
sample_n(5)
data_small
## # A tibble: 5 × 3
## team_code position_code height_in_inches
## <chr> <chr> <dbl>
## 1 DAL C 72
## 2 DAL R 72
## 3 CHI D 68
## 4 PIT C 73
## 5 VAN G 66
Separating and Uniting
Separate a column
data_sep <- data_small %>%
separate(col = height_in_inches, into = c("A","B"), sep = 1)
data_sep
## # A tibble: 5 × 4
## team_code position_code A B
## <chr> <chr> <chr> <chr>
## 1 DAL C 7 2
## 2 DAL R 7 2
## 3 CHI D 6 8
## 4 PIT C 7 3
## 5 VAN G 6 6
Unite two columns
data_sep %>%
unite(col = "height_in_inches", c(A,B), sep = "", )
## # A tibble: 5 × 3
## team_code position_code height_in_inches
## <chr> <chr> <chr>
## 1 DAL C 72
## 2 DAL R 72
## 3 CHI D 68
## 4 PIT C 73
## 5 VAN G 66
Missing Values