Import your data

bee_colonies <- read_excel("../00_data/MyData3.xlsx")

set.seed(123)
bee_colonies_small <- bee_colonies %>%
    select(year, months, state, colony_size, colony_lost) %>%
    sample_n(10)

Chapter 14

Tools

Detect matches

bee_colonies_small %>%
    summarise(sum(str_detect(colony_size, "^2")))
## # A tibble: 1 × 1
##   `sum(str_detect(colony_size, "^2"))`
##                                  <int>
## 1                                    3
str_detect(bee_colonies_small$colony_lost,"^2")
##  [1]  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
sum(str_detect(bee_colonies_small$colony_lost,"^2"))
## [1] 3
mean(str_detect(bee_colonies_small$colony_lost,"^2"))
## [1] 0.3

Extract matches

states <- c("Vermont", "Texas", "California", "Florida", "Utah")
state_match <- str_c(states, collapse = "|")
state_match
## [1] "Vermont|Texas|California|Florida|Utah"
pattern_matching_states <- str_subset(states, state_match)
matches <- str_extract(pattern_matching_states, state_match)
head(matches)
## [1] "Vermont"    "Texas"      "California" "Florida"    "Utah"

Replacing matches

bee_colonies_small %>% mutate(colony_lost %>% str_replace("^[1-9999]", "-"))
## # A tibble: 10 × 6
##     year months           state   colony_size colony_lost colony_lost %>% str_…¹
##    <dbl> <chr>            <chr>         <dbl>       <dbl> <chr>                 
##  1  2017 January-March    Utah          16000        2700 -700                  
##  2  2017 April-June       Vermont        6000         170 -70                   
##  3  2015 October-December Texas        125000       25000 -5000                 
##  4  2017 October-December Hawaii        15000         130 -30                   
##  5  2016 January-March    Florida      245000       45000 -5000                 
##  6  2019 October-December Wyoming       27000        3300 -300                  
##  7  2021 January-March    Kansas         5000        1400 -400                  
##  8  2020 July-September   Califo…      640000       69000 -9000                 
##  9  2018 July-September   Florida      197000       30000 -0000                 
## 10  2018 January-March    Texas        205000       22000 -2000                 
## # ℹ abbreviated name: ¹​`colony_lost %>% str_replace("^[1-9999]", "-")`

Splitting

months <- c("Januray: March", "April: June", "October: December")
months %>% str_split(": ", n = 2, simplify = TRUE)
##      [,1]      [,2]      
## [1,] "Januray" "March"   
## [2,] "April"   "June"    
## [3,] "October" "December"