stringr trainingThe purpose of this noteboook is to illustrate how the stringr package can be used to perform basic string manipulation tasks.
library(tidyverse)
fruit <- fruit[1:10]
fruit
## [1] "apple" "apricot" "avocado" "banana"
## [5] "bell pepper" "bilberry" "blackberry" "blackcurrant"
## [9] "blood orange" "blueberry"
str_detect(fruit, pattern = "ap")
## [1] TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
str_which(fruit, pattern = "ap")
## [1] 1 2
str_count(fruit, pattern = "ap")
## [1] 1 1 0 0 0 0 0 0 0 0
str_locate(fruit, pattern = "ap")
## start end
## [1,] 1 2
## [2,] 1 2
## [3,] NA NA
## [4,] NA NA
## [5,] NA NA
## [6,] NA NA
## [7,] NA NA
## [8,] NA NA
## [9,] NA NA
## [10,] NA NA
str_locate("fjg apddg apd", "ap")
## start end
## [1,] 5 6
str_locate_all("fjg apddg apd", "ap")
## [[1]]
## start end
## [1,] 5 6
## [2,] 11 12
str_sub(fruit, 1, 3)
## [1] "app" "apr" "avo" "ban" "bel" "bil" "bla" "bla" "blo" "blu"
str_subset(fruit, "ap") #USEFUL
## [1] "apple" "apricot"
str_extract(fruit, "ap")
## [1] "ap" "ap" NA NA NA NA NA NA NA NA
str_match(fruit, "ap")
## [,1]
## [1,] "ap"
## [2,] "ap"
## [3,] NA
## [4,] NA
## [5,] NA
## [6,] NA
## [7,] NA
## [8,] NA
## [9,] NA
## [10,] NA
str_match("fjg apddg apd", "ap")
## [,1]
## [1,] "ap"
str_match_all("fjg apddg apd", "ap")
## [[1]]
## [,1]
## [1,] "ap"
## [2,] "ap"
str_length(fruit)
## [1] 5 7 7 6 11 8 10 12 12 9
str_pad(fruit, 17, side = "right")
## [1] "apple " "apricot " "avocado "
## [4] "banana " "bell pepper " "bilberry "
## [7] "blackberry " "blackcurrant " "blood orange "
## [10] "blueberry "
str_trunc(fruit, 8)
## [1] "apple" "apricot" "avocado" "banana" "bell ..." "bilberry"
## [7] "black..." "black..." "blood..." "blueb..."
str_trim(" hello ")
## [1] "hello"
str_sub(fruit, 1, 3) <- "123"
str_replace(fruit, "a", "aaa")
## [1] "123le" "123icot" "123caaado" "123aaana"
## [5] "123l pepper" "123berry" "123ckberry" "123ckcurraaant"
## [9] "123od oraaange" "123eberry"
str_replace_all(fruit, "a", "aaa")
## [1] "123le" "123icot" "123caaado" "123aaanaaa"
## [5] "123l pepper" "123berry" "123ckberry" "123ckcurraaant"
## [9] "123od oraaange" "123eberry"
str_to_lower("HeLLO")
## [1] "hello"
str_to_upper(fruit)
## [1] "123LE" "123ICOT" "123CADO" "123ANA"
## [5] "123L PEPPER" "123BERRY" "123CKBERRY" "123CKCURRANT"
## [9] "123OD ORANGE" "123EBERRY"
str_to_title(fruit)
## [1] "123Le" "123Icot" "123Cado" "123Ana"
## [5] "123L Pepper" "123Berry" "123Ckberry" "123Ckcurrant"
## [9] "123Od Orange" "123Eberry"
snakecase::to_snake_case("thisText", sep_out = " ")
## [1] "this text"
snakecase::to_snake_case("thisText", sep_out = "_")
## [1] "this_text"
str_c(4, 6)
## [1] "46"
str_c("cat", "dog", sep = "_")
## [1] "cat_dog"
str_dup(fruit, 2)
## [1] "123le123le" "123icot123icot"
## [3] "123cado123cado" "123ana123ana"
## [5] "123l pepper123l pepper" "123berry123berry"
## [7] "123ckberry123ckberry" "123ckcurrant123ckcurrant"
## [9] "123od orange123od orange" "123eberry123eberry"
str_split_fixed(fruit, pattern = " ", n = 3)
## [,1] [,2] [,3]
## [1,] "123le" "" ""
## [2,] "123icot" "" ""
## [3,] "123cado" "" ""
## [4,] "123ana" "" ""
## [5,] "123l" "pepper" ""
## [6,] "123berry" "" ""
## [7,] "123ckberry" "" ""
## [8,] "123ckcurrant" "" ""
## [9,] "123od" "orange" ""
## [10,] "123eberry" "" ""
str_split(fruit, pattern = " ")
## [[1]]
## [1] "123le"
##
## [[2]]
## [1] "123icot"
##
## [[3]]
## [1] "123cado"
##
## [[4]]
## [1] "123ana"
##
## [[5]]
## [1] "123l" "pepper"
##
## [[6]]
## [1] "123berry"
##
## [[7]]
## [1] "123ckberry"
##
## [[8]]
## [1] "123ckcurrant"
##
## [[9]]
## [1] "123od" "orange"
##
## [[10]]
## [1] "123eberry"
str_c("Pi is ", fruit, ". 123")
## [1] "Pi is 123le. 123" "Pi is 123icot. 123"
## [3] "Pi is 123cado. 123" "Pi is 123ana. 123"
## [5] "Pi is 123l pepper. 123" "Pi is 123berry. 123"
## [7] "Pi is 123ckberry. 123" "Pi is 123ckcurrant. 123"
## [9] "Pi is 123od orange. 123" "Pi is 123eberry. 123"
x <- str_c("t", "c", "g")
str_order(fruit)
## [1] 4 6 3 7 8 10 2 5 1 9
str_sort(fruit, decreasing = T)
## [1] "123od orange" "123le" "123l pepper" "123icot"
## [5] "123eberry" "123ckcurrant" "123ckberry" "123cado"
## [9] "123berry" "123ana"