Import your data

rosters <- read_excel("../00_data/myData.xlsx", sheet = "nhl_rosters") %>%
    head(50)
rosters
## # A tibble: 50 × 18
##    team_code   season position_type player_id headshot      first_name last_name
##    <chr>        <dbl> <chr>             <dbl> <chr>         <chr>      <chr>    
##  1 ATL       19992000 forwards        8467867 https://asse… Bryan      Adams    
##  2 ATL       19992000 forwards        8445176 https://asse… Donald     Audette  
##  3 ATL       19992000 forwards        8460014 https://asse… Eric       Bertrand 
##  4 ATL       19992000 forwards        8460510 https://asse… Jason      Botterill
##  5 ATL       19992000 forwards        8459596 https://asse… Andrew     Brunette 
##  6 ATL       19992000 forwards        8445733 https://asse… Kelly      Buchberg…
##  7 ATL       19992000 forwards        8460573 https://asse… Hnat       Domenich…
##  8 ATL       19992000 forwards        8459450 https://asse… Shean      Donovan  
##  9 ATL       19992000 forwards        8446675 https://asse… Nelson     Emerson  
## 10 ATL       19992000 forwards        8446823 https://asse… Ray        Ferraro  
## # ℹ 40 more rows
## # ℹ 11 more variables: sweater_number <chr>, position_code <chr>,
## #   shoots_catches <chr>, height_in_inches <dbl>, weight_in_pounds <dbl>,
## #   height_in_centimeters <dbl>, weight_in_kilograms <dbl>, birth_date <dttm>,
## #   birth_city <chr>, birth_country <chr>, birth_state_province <chr>

Chapter 14

Tools

Detect matches

rosters %>%
    summarise(sum(str_detect(sweater_number, "1$")))
## # A tibble: 1 × 1
##   `sum(str_detect(sweater_number, "1$"))`
##                                     <int>
## 1                                       7
str_detect(rosters$sweater_number, "1$")
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE
## [13] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE
## [49]  TRUE FALSE
sum(str_detect(rosters$sweater_number, "1$"))
## [1] 7
mean(str_detect(rosters$sweater_number, "1$"))
## [1] 0.14

Extract matches

colours <- c("red","orange","yellow","green","blue","purple" )
colour_match <- str_c(colours, collapse = "|")
colour_match
## [1] "red|orange|yellow|green|blue|purple"
# Extract strings with a color
has_colour <- str_subset(sentences, colour_match)
str_extract(has_colour, colour_match)
##  [1] "blue"   "blue"   "red"    "red"    "red"    "blue"   "yellow" "red"   
##  [9] "red"    "green"  "red"    "red"    "blue"   "red"    "red"    "red"   
## [17] "red"    "blue"   "red"    "blue"   "red"    "green"  "red"    "red"   
## [25] "red"    "red"    "red"    "red"    "green"  "red"    "green"  "red"   
## [33] "purple" "green"  "red"    "red"    "red"    "red"    "red"    "blue"  
## [41] "red"    "blue"   "red"    "red"    "red"    "red"    "green"  "green" 
## [49] "green"  "red"    "red"    "yellow" "red"    "orange" "red"    "red"   
## [57] "red"

Replacing matches

rosters %>% mutate(player_id = player_id %>% str_replace("[4]", "+"))
## # A tibble: 50 × 18
##    team_code   season position_type player_id headshot      first_name last_name
##    <chr>        <dbl> <chr>         <chr>     <chr>         <chr>      <chr>    
##  1 ATL       19992000 forwards      8+67867   https://asse… Bryan      Adams    
##  2 ATL       19992000 forwards      8+45176   https://asse… Donald     Audette  
##  3 ATL       19992000 forwards      8+60014   https://asse… Eric       Bertrand 
##  4 ATL       19992000 forwards      8+60510   https://asse… Jason      Botterill
##  5 ATL       19992000 forwards      8+59596   https://asse… Andrew     Brunette 
##  6 ATL       19992000 forwards      8+45733   https://asse… Kelly      Buchberg…
##  7 ATL       19992000 forwards      8+60573   https://asse… Hnat       Domenich…
##  8 ATL       19992000 forwards      8+59450   https://asse… Shean      Donovan  
##  9 ATL       19992000 forwards      8+46675   https://asse… Nelson     Emerson  
## 10 ATL       19992000 forwards      8+46823   https://asse… Ray        Ferraro  
## # ℹ 40 more rows
## # ℹ 11 more variables: sweater_number <chr>, position_code <chr>,
## #   shoots_catches <chr>, height_in_inches <dbl>, weight_in_pounds <dbl>,
## #   height_in_centimeters <dbl>, weight_in_kilograms <dbl>, birth_date <dttm>,
## #   birth_city <chr>, birth_country <chr>, birth_state_province <chr>
rosters %>% mutate(player_id = player_id %>% str_replace_all("[4]", "+"))
## # A tibble: 50 × 18
##    team_code   season position_type player_id headshot      first_name last_name
##    <chr>        <dbl> <chr>         <chr>     <chr>         <chr>      <chr>    
##  1 ATL       19992000 forwards      8+67867   https://asse… Bryan      Adams    
##  2 ATL       19992000 forwards      8++5176   https://asse… Donald     Audette  
##  3 ATL       19992000 forwards      8+6001+   https://asse… Eric       Bertrand 
##  4 ATL       19992000 forwards      8+60510   https://asse… Jason      Botterill
##  5 ATL       19992000 forwards      8+59596   https://asse… Andrew     Brunette 
##  6 ATL       19992000 forwards      8++5733   https://asse… Kelly      Buchberg…
##  7 ATL       19992000 forwards      8+60573   https://asse… Hnat       Domenich…
##  8 ATL       19992000 forwards      8+59+50   https://asse… Shean      Donovan  
##  9 ATL       19992000 forwards      8++6675   https://asse… Nelson     Emerson  
## 10 ATL       19992000 forwards      8++6823   https://asse… Ray        Ferraro  
## # ℹ 40 more rows
## # ℹ 11 more variables: sweater_number <chr>, position_code <chr>,
## #   shoots_catches <chr>, height_in_inches <dbl>, weight_in_pounds <dbl>,
## #   height_in_centimeters <dbl>, weight_in_kilograms <dbl>, birth_date <dttm>,
## #   birth_city <chr>, birth_country <chr>, birth_state_province <chr>