Import data
# excel file
data <- read_excel("../00_data/NHLDATA.xlsx")
data_small <- data %>% select(where(is.character)) %>% head(n = 50)
data_small
## # A tibble: 50 × 5
## first_name last_name birth_city birth_country birth_state_province
## <chr> <chr> <chr> <chr> <chr>
## 1 Bryan Adams Fort St. James CAN British Columbia
## 2 Donald Audette Laval CAN Quebec
## 3 Eric Bertrand St-Ephrem CAN Quebec
## 4 Jason Botterill Edmonton CAN Alberta
## 5 Andrew Brunette Sudbury CAN Ontario
## 6 Kelly Buchberger Langenburg CAN Saskatchewan
## 7 Hnat Domenichelli Edmonton CAN Alberta
## 8 Shean Donovan Timmins CAN Ontario
## 9 Nelson Emerson Hamilton CAN Ontario
## 10 Ray Ferraro Trail CAN British Columbia
## # ℹ 40 more rows
Detect matches
data_small %>%
filter(str_detect(first_name, "A"))
## # A tibble: 2 × 5
## first_name last_name birth_city birth_country birth_state_province
## <chr> <chr> <chr> <chr> <chr>
## 1 Andrew Brunette Sudbury CAN Ontario
## 2 Andreas Karlsson Ludvika SWE NA
# Count how many first names end with "n"
data_small %>%
summarise(sum(str_detect(first_name, "n$")))
## # A tibble: 1 × 1
## `sum(str_detect(first_name, "n$"))`
## <int>
## 1 12
# Show TRUE/FALSE for each name
str_detect(data_small$first_name, "n$")
## [1] TRUE FALSE FALSE TRUE FALSE FALSE FALSE TRUE TRUE FALSE TRUE TRUE
## [13] FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## [25] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE TRUE
# Total count of names ending with "n"
sum(str_detect(data_small$first_name, "n$"))
## [1] 12
# Proportion of names ending with "n"
mean(str_detect(data_small$first_name, "n$"))
## [1] 0.24
Replacing matches
# Replace the first letter of first_name with "-"
data_small %>%
mutate(first_name_rev = first_name %>% str_replace("^[A-Z]", "-"))
## # A tibble: 50 × 6
## first_name last_name birth_city birth_country birth_state_province
## <chr> <chr> <chr> <chr> <chr>
## 1 Bryan Adams Fort St. James CAN British Columbia
## 2 Donald Audette Laval CAN Quebec
## 3 Eric Bertrand St-Ephrem CAN Quebec
## 4 Jason Botterill Edmonton CAN Alberta
## 5 Andrew Brunette Sudbury CAN Ontario
## 6 Kelly Buchberger Langenburg CAN Saskatchewan
## 7 Hnat Domenichelli Edmonton CAN Alberta
## 8 Shean Donovan Timmins CAN Ontario
## 9 Nelson Emerson Hamilton CAN Ontario
## 10 Ray Ferraro Trail CAN British Columbia
## # ℹ 40 more rows
## # ℹ 1 more variable: first_name_rev <chr>
# Replace all uppercase letters in first_name with "-"
data_small %>%
mutate(first_name_rev = first_name %>% str_replace_all("[A-Z]", "-"))
## # A tibble: 50 × 6
## first_name last_name birth_city birth_country birth_state_province
## <chr> <chr> <chr> <chr> <chr>
## 1 Bryan Adams Fort St. James CAN British Columbia
## 2 Donald Audette Laval CAN Quebec
## 3 Eric Bertrand St-Ephrem CAN Quebec
## 4 Jason Botterill Edmonton CAN Alberta
## 5 Andrew Brunette Sudbury CAN Ontario
## 6 Kelly Buchberger Langenburg CAN Saskatchewan
## 7 Hnat Domenichelli Edmonton CAN Alberta
## 8 Shean Donovan Timmins CAN Ontario
## 9 Nelson Emerson Hamilton CAN Ontario
## 10 Ray Ferraro Trail CAN British Columbia
## # ℹ 40 more rows
## # ℹ 1 more variable: first_name_rev <chr>