1. Purpose.

The purpose of this noteboook is to illustrate how the stringr package can be used to perform basic string manipulation tasks.

2. Load libraries and view practice dataset.

library(tidyverse)
fruit <- fruit[1:10]
fruit
##  [1] "apple"        "apricot"      "avocado"      "banana"      
##  [5] "bell pepper"  "bilberry"     "blackberry"   "blackcurrant"
##  [9] "blood orange" "blueberry"

3. Detect string matches.

str_detect(fruit, pattern = "ap")
##  [1]  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
str_which(fruit, pattern = "ap")
## [1] 1 2
str_count(fruit, pattern = "ap")
##  [1] 1 1 0 0 0 0 0 0 0 0
str_locate(fruit, pattern = "ap")
##       start end
##  [1,]     1   2
##  [2,]     1   2
##  [3,]    NA  NA
##  [4,]    NA  NA
##  [5,]    NA  NA
##  [6,]    NA  NA
##  [7,]    NA  NA
##  [8,]    NA  NA
##  [9,]    NA  NA
## [10,]    NA  NA
str_locate("fjg apddg apd", "ap")
##      start end
## [1,]     5   6
str_locate_all("fjg apddg apd", "ap")
## [[1]]
##      start end
## [1,]     5   6
## [2,]    11  12

4. Subset strings.

str_sub(fruit, 1, 3)
##  [1] "app" "apr" "avo" "ban" "bel" "bil" "bla" "bla" "blo" "blu"
str_subset(fruit, "ap") #USEFUL
## [1] "apple"   "apricot"
str_extract(fruit, "ap")
##  [1] "ap" "ap" NA   NA   NA   NA   NA   NA   NA   NA
str_match(fruit, "ap")
##       [,1]
##  [1,] "ap"
##  [2,] "ap"
##  [3,] NA  
##  [4,] NA  
##  [5,] NA  
##  [6,] NA  
##  [7,] NA  
##  [8,] NA  
##  [9,] NA  
## [10,] NA
str_match("fjg apddg apd", "ap") 
##      [,1]
## [1,] "ap"
str_match_all("fjg apddg apd", "ap")
## [[1]]
##      [,1]
## [1,] "ap"
## [2,] "ap"

5. Manage string lengths.

str_length(fruit)
##  [1]  5  7  7  6 11  8 10 12 12  9
str_pad(fruit, 17, side = "right")
##  [1] "apple            " "apricot          " "avocado          "
##  [4] "banana           " "bell pepper      " "bilberry         "
##  [7] "blackberry       " "blackcurrant     " "blood orange     "
## [10] "blueberry        "
str_trunc(fruit, 8)
##  [1] "apple"    "apricot"  "avocado"  "banana"   "bell ..." "bilberry"
##  [7] "black..." "black..." "blood..." "blueb..."
str_trim(" hello   ")
## [1] "hello"

6. Adjust string values.

str_sub(fruit, 1, 3) <- "123"
str_replace(fruit, "a", "aaa")
##  [1] "123le"          "123icot"        "123caaado"      "123aaana"      
##  [5] "123l pepper"    "123berry"       "123ckberry"     "123ckcurraaant"
##  [9] "123od oraaange" "123eberry"
str_replace_all(fruit, "a", "aaa")
##  [1] "123le"          "123icot"        "123caaado"      "123aaanaaa"    
##  [5] "123l pepper"    "123berry"       "123ckberry"     "123ckcurraaant"
##  [9] "123od oraaange" "123eberry"
str_to_lower("HeLLO")
## [1] "hello"
str_to_upper(fruit)
##  [1] "123LE"        "123ICOT"      "123CADO"      "123ANA"      
##  [5] "123L PEPPER"  "123BERRY"     "123CKBERRY"   "123CKCURRANT"
##  [9] "123OD ORANGE" "123EBERRY"
str_to_title(fruit)
##  [1] "123Le"        "123Icot"      "123Cado"      "123Ana"      
##  [5] "123L Pepper"  "123Berry"     "123Ckberry"   "123Ckcurrant"
##  [9] "123Od Orange" "123Eberry"
snakecase::to_snake_case("thisText", sep_out = " ")
## [1] "this text"
snakecase::to_snake_case("thisText", sep_out = "_")
## [1] "this_text"

7. join & split strings.

str_c(4, 6)
## [1] "46"
str_c("cat", "dog", sep = "_")
## [1] "cat_dog"
str_dup(fruit, 2)
##  [1] "123le123le"               "123icot123icot"          
##  [3] "123cado123cado"           "123ana123ana"            
##  [5] "123l pepper123l pepper"   "123berry123berry"        
##  [7] "123ckberry123ckberry"     "123ckcurrant123ckcurrant"
##  [9] "123od orange123od orange" "123eberry123eberry"
str_split_fixed(fruit, pattern = " ", n = 3)
##       [,1]           [,2]     [,3]
##  [1,] "123le"        ""       ""  
##  [2,] "123icot"      ""       ""  
##  [3,] "123cado"      ""       ""  
##  [4,] "123ana"       ""       ""  
##  [5,] "123l"         "pepper" ""  
##  [6,] "123berry"     ""       ""  
##  [7,] "123ckberry"   ""       ""  
##  [8,] "123ckcurrant" ""       ""  
##  [9,] "123od"        "orange" ""  
## [10,] "123eberry"    ""       ""
str_split(fruit, pattern = " ")
## [[1]]
## [1] "123le"
## 
## [[2]]
## [1] "123icot"
## 
## [[3]]
## [1] "123cado"
## 
## [[4]]
## [1] "123ana"
## 
## [[5]]
## [1] "123l"   "pepper"
## 
## [[6]]
## [1] "123berry"
## 
## [[7]]
## [1] "123ckberry"
## 
## [[8]]
## [1] "123ckcurrant"
## 
## [[9]]
## [1] "123od"  "orange"
## 
## [[10]]
## [1] "123eberry"
str_c("Pi is ", fruit, ". 123")
##  [1] "Pi is 123le. 123"        "Pi is 123icot. 123"     
##  [3] "Pi is 123cado. 123"      "Pi is 123ana. 123"      
##  [5] "Pi is 123l pepper. 123"  "Pi is 123berry. 123"    
##  [7] "Pi is 123ckberry. 123"   "Pi is 123ckcurrant. 123"
##  [9] "Pi is 123od orange. 123" "Pi is 123eberry. 123"

8. order strings.

x <- str_c("t", "c", "g")
str_order(fruit)
##  [1]  4  6  3  7  8 10  2  5  1  9
str_sort(fruit, decreasing = T)
##  [1] "123od orange" "123le"        "123l pepper"  "123icot"     
##  [5] "123eberry"    "123ckcurrant" "123ckberry"   "123cado"     
##  [9] "123berry"     "123ana"