rosters <- read_excel("../00_data/myData.xlsx", sheet = "nhl_rosters") %>%
head(50)
rosters
## # A tibble: 50 × 18
## team_code season position_type player_id headshot first_name last_name
## <chr> <dbl> <chr> <dbl> <chr> <chr> <chr>
## 1 ATL 19992000 forwards 8467867 https://asse… Bryan Adams
## 2 ATL 19992000 forwards 8445176 https://asse… Donald Audette
## 3 ATL 19992000 forwards 8460014 https://asse… Eric Bertrand
## 4 ATL 19992000 forwards 8460510 https://asse… Jason Botterill
## 5 ATL 19992000 forwards 8459596 https://asse… Andrew Brunette
## 6 ATL 19992000 forwards 8445733 https://asse… Kelly Buchberg…
## 7 ATL 19992000 forwards 8460573 https://asse… Hnat Domenich…
## 8 ATL 19992000 forwards 8459450 https://asse… Shean Donovan
## 9 ATL 19992000 forwards 8446675 https://asse… Nelson Emerson
## 10 ATL 19992000 forwards 8446823 https://asse… Ray Ferraro
## # ℹ 40 more rows
## # ℹ 11 more variables: sweater_number <chr>, position_code <chr>,
## # shoots_catches <chr>, height_in_inches <dbl>, weight_in_pounds <dbl>,
## # height_in_centimeters <dbl>, weight_in_kilograms <dbl>, birth_date <dttm>,
## # birth_city <chr>, birth_country <chr>, birth_state_province <chr>
rosters %>%
summarise(sum(str_detect(sweater_number, "1$")))
## # A tibble: 1 × 1
## `sum(str_detect(sweater_number, "1$"))`
## <int>
## 1 7
str_detect(rosters$sweater_number, "1$")
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## [13] FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
## [49] TRUE FALSE
sum(str_detect(rosters$sweater_number, "1$"))
## [1] 7
mean(str_detect(rosters$sweater_number, "1$"))
## [1] 0.14
colours <- c("red","orange","yellow","green","blue","purple" )
colour_match <- str_c(colours, collapse = "|")
colour_match
## [1] "red|orange|yellow|green|blue|purple"
# Extract strings with a color
has_colour <- str_subset(sentences, colour_match)
str_extract(has_colour, colour_match)
## [1] "blue" "blue" "red" "red" "red" "blue" "yellow" "red"
## [9] "red" "green" "red" "red" "blue" "red" "red" "red"
## [17] "red" "blue" "red" "blue" "red" "green" "red" "red"
## [25] "red" "red" "red" "red" "green" "red" "green" "red"
## [33] "purple" "green" "red" "red" "red" "red" "red" "blue"
## [41] "red" "blue" "red" "red" "red" "red" "green" "green"
## [49] "green" "red" "red" "yellow" "red" "orange" "red" "red"
## [57] "red"
rosters %>% mutate(player_id = player_id %>% str_replace("[4]", "+"))
## # A tibble: 50 × 18
## team_code season position_type player_id headshot first_name last_name
## <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 ATL 19992000 forwards 8+67867 https://asse… Bryan Adams
## 2 ATL 19992000 forwards 8+45176 https://asse… Donald Audette
## 3 ATL 19992000 forwards 8+60014 https://asse… Eric Bertrand
## 4 ATL 19992000 forwards 8+60510 https://asse… Jason Botterill
## 5 ATL 19992000 forwards 8+59596 https://asse… Andrew Brunette
## 6 ATL 19992000 forwards 8+45733 https://asse… Kelly Buchberg…
## 7 ATL 19992000 forwards 8+60573 https://asse… Hnat Domenich…
## 8 ATL 19992000 forwards 8+59450 https://asse… Shean Donovan
## 9 ATL 19992000 forwards 8+46675 https://asse… Nelson Emerson
## 10 ATL 19992000 forwards 8+46823 https://asse… Ray Ferraro
## # ℹ 40 more rows
## # ℹ 11 more variables: sweater_number <chr>, position_code <chr>,
## # shoots_catches <chr>, height_in_inches <dbl>, weight_in_pounds <dbl>,
## # height_in_centimeters <dbl>, weight_in_kilograms <dbl>, birth_date <dttm>,
## # birth_city <chr>, birth_country <chr>, birth_state_province <chr>
rosters %>% mutate(player_id = player_id %>% str_replace_all("[4]", "+"))
## # A tibble: 50 × 18
## team_code season position_type player_id headshot first_name last_name
## <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 ATL 19992000 forwards 8+67867 https://asse… Bryan Adams
## 2 ATL 19992000 forwards 8++5176 https://asse… Donald Audette
## 3 ATL 19992000 forwards 8+6001+ https://asse… Eric Bertrand
## 4 ATL 19992000 forwards 8+60510 https://asse… Jason Botterill
## 5 ATL 19992000 forwards 8+59596 https://asse… Andrew Brunette
## 6 ATL 19992000 forwards 8++5733 https://asse… Kelly Buchberg…
## 7 ATL 19992000 forwards 8+60573 https://asse… Hnat Domenich…
## 8 ATL 19992000 forwards 8+59+50 https://asse… Shean Donovan
## 9 ATL 19992000 forwards 8++6675 https://asse… Nelson Emerson
## 10 ATL 19992000 forwards 8++6823 https://asse… Ray Ferraro
## # ℹ 40 more rows
## # ℹ 11 more variables: sweater_number <chr>, position_code <chr>,
## # shoots_catches <chr>, height_in_inches <dbl>, weight_in_pounds <dbl>,
## # height_in_centimeters <dbl>, weight_in_kilograms <dbl>, birth_date <dttm>,
## # birth_city <chr>, birth_country <chr>, birth_state_province <chr>