nhl_rosters <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-01-09/nhl_rosters.csv')
## Rows: 54883 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): team_code, position_type, headshot, first_name, last_name, positi...
## dbl (7): season, player_id, sweater_number, height_in_inches, weight_in_po...
## date (1): birth_date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# using set seed this time so we get same players...
set.seed(1234)
nhl_rs <- nhl_rosters %>% select(team_code, first_name, last_name, birth_city, birth_country) %>% sample_n(100)
nhl_rs
## # A tibble: 100 × 5
## team_code first_name last_name birth_city birth_country
## <chr> <chr> <chr> <chr> <chr>
## 1 DAL Cody Eakin Winnipeg CAN
## 2 DAL Ales Hemsky Pardubice CZE
## 3 CHI Marty Burke Toronto CAN
## 4 PIT Harry York Ponoka CAN
## 5 VAN Richard Brodeur Longueuil CAN
## 6 COL Patrick Bordeleau Montréal CAN
## 7 OTT Brady Tkachuk Scottsdale USA
## 8 PHI Ross Lonsberry Humboldt CAN
## 9 PIT Peter Skudra Riga LVA
## 10 TOR Ted Kennedy Humberstone CAN
## # ℹ 90 more rows
nhl_rs$first_name
## [1] "Cody" "Ales" "Marty" "Harry" "Richard" "Patrick"
## [7] "Brady" "Ross" "Peter" "Ted" "John" "Jon"
## [13] "Pat" "Dick" "Kevin" "Jari" "Andrei" "Pascal"
## [19] "Lee" "Radek" "Drew" "Dan" "Pat" "Bob"
## [25] "Andrew" "Andy" "Nick" "Doug" "Murray" "Marcel"
## [31] "George" "Randy" "Maxim" "Kirk" "Tony" "Michael"
## [37] "John" "Paul" "Jack" "David" "Larry" "Keith"
## [43] "Jaroslav" "Jason" "Tony" "Karel" "Dale" "Stephane"
## [49] "Gary" "Blair" "Valeri" "Brian" "Lars" "Matt"
## [55] "Daniel" "Craig" "Bryan" "Frantisek" "Trevor" "Dan"
## [61] "Riley" "Valeri" "Jason" "Ron" "Darcy" "Jiri"
## [67] "Greg" "Brandon" "Greg" "Shayne" "Brayden" "Chuck"
## [73] "Craig" "Craig" "Brian" "Jacques" "John" "Kent-Erik"
## [79] "Todd" "Dan" "Martin" "Jack" "Ted" "Roberto"
## [85] "Kari" "David" "Kerby" "Justin" "Mike" "Pat"
## [91] "Terry" "Tyson" "Oleg" "Nicholas" "Byron" "Jason"
## [97] "Jamie" "Brenden" "Wayne" "Sidney"
# str_detect(nhl_rs$first_name, "Dan")
sum(str_detect(nhl_rs$first_name, "Dan"))
## [1] 4
nhl_rs %>%
summarise(num_Dan = sum(str_detect(first_name, "Dan")))
## # A tibble: 1 × 1
## num_Dan
## <int>
## 1 4
sum(str_detect(nhl_rs$birth_country, "CAN"))
## [1] 70
canadian_Dans = str_detect(nhl_rs$first_name, "Dan") & str_detect(nhl_rs$birth_country, "CAN")
canadian_Dans
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE
nhl_rs %>%
summarise(
canadian_Dans = sum(
str_detect(first_name, "Dan") & str_detect(birth_country, "CAN")
)
)
## # A tibble: 1 × 1
## canadian_Dans
## <int>
## 1 4
Extract Dan’s
nhl_rs %>%
mutate(col_Dan = str_extract(first_name, "Dan")) %>%
select(first_name, last_name, col_Dan) %>%
filter(!is.na(col_Dan))
## # A tibble: 4 × 3
## first_name last_name col_Dan
## <chr> <chr> <chr>
## 1 Dan Bonar Dan
## 2 Daniel Marois Dan
## 3 Dan Maloney Dan
## 4 Dan Boyle Dan
nhl_rs %>%
mutate(col_HanSolo = str_replace(first_name, "Cody", "HanSolo")) %>%
select(col_HanSolo, last_name) %>%
filter(!is.na(col_HanSolo))
## # A tibble: 100 × 2
## col_HanSolo last_name
## <chr> <chr>
## 1 HanSolo Eakin
## 2 Ales Hemsky
## 3 Marty Burke
## 4 Harry York
## 5 Richard Brodeur
## 6 Patrick Bordeleau
## 7 Brady Tkachuk
## 8 Ross Lonsberry
## 9 Peter Skudra
## 10 Ted Kennedy
## # ℹ 90 more rows
nhl_hansolo <- nhl_rs %>%
mutate(birth_country = str_replace(birth_country, "CAN", "Corellia"))
nhl_hansolo
## # A tibble: 100 × 5
## team_code first_name last_name birth_city birth_country
## <chr> <chr> <chr> <chr> <chr>
## 1 DAL Cody Eakin Winnipeg Corellia
## 2 DAL Ales Hemsky Pardubice CZE
## 3 CHI Marty Burke Toronto Corellia
## 4 PIT Harry York Ponoka Corellia
## 5 VAN Richard Brodeur Longueuil Corellia
## 6 COL Patrick Bordeleau Montréal Corellia
## 7 OTT Brady Tkachuk Scottsdale USA
## 8 PHI Ross Lonsberry Humboldt Corellia
## 9 PIT Peter Skudra Riga LVA
## 10 TOR Ted Kennedy Humberstone Corellia
## # ℹ 90 more rows
nhl_hansolo %>%
mutate(
first_name = if_else(birth_country == "Corellia", "Han", first_name),
last_name = if_else(birth_country == "Corellia", "Solo", last_name)
)
## # A tibble: 100 × 5
## team_code first_name last_name birth_city birth_country
## <chr> <chr> <chr> <chr> <chr>
## 1 DAL Han Solo Winnipeg Corellia
## 2 DAL Ales Hemsky Pardubice CZE
## 3 CHI Han Solo Toronto Corellia
## 4 PIT Han Solo Ponoka Corellia
## 5 VAN Han Solo Longueuil Corellia
## 6 COL Han Solo Montréal Corellia
## 7 OTT Brady Tkachuk Scottsdale USA
## 8 PHI Han Solo Humboldt Corellia
## 9 PIT Peter Skudra Riga LVA
## 10 TOR Han Solo Humberstone Corellia
## # ℹ 90 more rows