Import your data

colony <- read_excel("../00_data/myData.xlsx")
set.seed(123)

colony_small <- colony %>%
    sample_n(10) %>%
    select(year, state, colony_lost, colony_added)

colony_small
## # A tibble: 10 × 4
##     year state      colony_lost colony_added
##    <dbl> <chr>            <dbl> <chr>       
##  1  2017 Utah              2700 2900        
##  2  2017 Vermont            170 390         
##  3  2015 Texas            25000 13000       
##  4  2017 Hawaii             130 970         
##  5  2016 Florida          45000 36000       
##  6  2019 Wyoming           3300 100         
##  7  2021 Kansas            1400 2300        
##  8  2020 California       69000 61000       
##  9  2018 Florida          30000 53000       
## 10  2018 Texas            22000 118000

Chapter 14

Tools

Detect matches

colony_small %>%
    summarise(sum(str_detect(year, "7$")))
## # A tibble: 1 × 1
##   `sum(str_detect(year, "7$"))`
##                           <int>
## 1                             3
str_detect(colony_small$year, "7$")
##  [1]  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
sum(str_detect(colony_small$year, "7$"))
## [1] 3

Extract matches

states <- c("Texas", "Florida", "California", "Kansas", "Wyoming", "Florida", "Hawaii", "Texas", "Vermont", "Utah")
state_match <- str_c(states, collapse = "|")
state_match
## [1] "Texas|Florida|California|Kansas|Wyoming|Florida|Hawaii|Texas|Vermont|Utah"
str_extract(states, "Florida")
##  [1] NA        "Florida" NA        NA        NA        "Florida" NA       
##  [8] NA        NA        NA

Replacing matches

colony_small %>% mutate(colony_lost %>% str_replace_all("[0]", "-"))
## # A tibble: 10 × 5
##     year state      colony_lost colony_added colony_lost %>% str_replace_all("…¹
##    <dbl> <chr>            <dbl> <chr>        <chr>                              
##  1  2017 Utah              2700 2900         27--                               
##  2  2017 Vermont            170 390          17-                                
##  3  2015 Texas            25000 13000        25---                              
##  4  2017 Hawaii             130 970          13-                                
##  5  2016 Florida          45000 36000        45---                              
##  6  2019 Wyoming           3300 100          33--                               
##  7  2021 Kansas            1400 2300         14--                               
##  8  2020 California       69000 61000        69---                              
##  9  2018 Florida          30000 53000        3----                              
## 10  2018 Texas            22000 118000       22---                              
## # … with abbreviated variable name
## #   ¹​`colony_lost %>% str_replace_all("[0]", "-")`