Library

library(stringr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Data

x <- c("why", "video", "cross", "extra", "deal", "authority")
x
## [1] "why"       "video"     "cross"     "extra"     "deal"      "authority"

length,collapse,sub string

str_length(x)
## [1] 3 5 5 5 4 9
str_c(x, collapse = ", ")
## [1] "why, video, cross, extra, deal, authority"
str_sub(x, 1, 2)
## [1] "wh" "vi" "cr" "ex" "de" "au"

[aeiou] pattern

str_detect(x, "[aeiou]")
## [1] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
str_count(x, "[aeiou]")
## [1] 0 3 1 2 2 4
str_subset(x, "[aeiou]")
## [1] "video"     "cross"     "extra"     "deal"      "authority"
str_extract(x, "[aeiou]")
## [1] NA  "i" "o" "e" "e" "a"
str_replace(x, "[aeiou]", "?")
## [1] "why"       "v?deo"     "cr?ss"     "?xtra"     "d?al"      "?uthority"

Whitespace

‘str_pad’

x <- c("abc", "defghi")
str_pad(x, 10) # default pads on left
## [1] "       abc" "    defghi"
str_pad(x, 10, "both")
## [1] "   abc    " "  defghi  "
str_pad(x, 4)
## [1] " abc"   "defghi"

‘trunc’ & ‘str_pad’

x <- c("Short", "This is a long string")

x %>% 
  str_trunc(10) %>% 
  str_pad(10, "right")
## [1] "Short     " "This is..."

‘trim’

x <- c("  a   ", "b   ",  "   c")
str_trim(x)
## [1] "a" "b" "c"
str_trim(x, "left")
## [1] "a   " "b   " "c"

wraping

jabberwocky <- str_c(
  "`Twas brillig, and the slithy toves ",
  "did gyre and gimble in the wabe: ",
  "All mimsy were the borogoves, ",
  "and the mome raths outgrabe. "
)
cat(str_wrap(jabberwocky, width = 40))
## `Twas brillig, and the slithy toves did
## gyre and gimble in the wabe: All mimsy
## were the borogoves, and the mome raths
## outgrabe.
#> `Twas brillig, and the slithy toves did
#> gyre and gimble in the wabe: All mimsy
#> were the borogoves, and the mome raths
#> outgrabe.

Locale sensitive

x <- "I like horses."
str_to_upper(x)
## [1] "I LIKE HORSES."
str_to_title(x)
## [1] "I Like Horses."
str_to_lower(x)
## [1] "i like horses."

String ordering and sorting

x <- c("y", "i", "k")
str_order(x)
## [1] 2 3 1
str_sort(x)
## [1] "i" "k" "y"

Pattern matching

strings <- c(
  "apple", 
  "219 733 8965", 
  "329-293-8753", 
  "Work: 579-499-7527; Home: 543.355.3679"
)
phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"
str_detect(strings, phone)
## [1] FALSE  TRUE  TRUE  TRUE
str_subset(strings, phone)
## [1] "219 733 8965"                          
## [2] "329-293-8753"                          
## [3] "Work: 579-499-7527; Home: 543.355.3679"
str_count(strings, phone)
## [1] 0 1 1 2
str_extract(strings, phone)
## [1] NA             "219 733 8965" "329-293-8753" "579-499-7527"
str_extract_all(strings, phone, simplify = TRUE)
##      [,1]           [,2]          
## [1,] ""             ""            
## [2,] "219 733 8965" ""            
## [3,] "329-293-8753" ""            
## [4,] "579-499-7527" "543.355.3679"

replace value

str_replace(strings, phone, "XXX-XXX-XXXX")
## [1] "apple"                                 
## [2] "XXX-XXX-XXXX"                          
## [3] "XXX-XXX-XXXX"                          
## [4] "Work: XXX-XXX-XXXX; Home: 543.355.3679"
str_replace_all(strings, phone, "XXX-XXX-XXXX")
## [1] "apple"                                 
## [2] "XXX-XXX-XXXX"                          
## [3] "XXX-XXX-XXXX"                          
## [4] "Work: XXX-XXX-XXXX; Home: XXX-XXX-XXXX"

Split

str_split("a-b-c", "-")
## [[1]]
## [1] "a" "b" "c"
str_split_fixed("a-b-c", "-", n = 2)
##      [,1] [,2] 
## [1,] "a"  "b-c"
x <- "This is a sentence."
str_split(x, "")
## [[1]]
##  [1] "T" "h" "i" "s" " " "i" "s" " " "a" " " "s" "e" "n" "t" "e" "n" "c" "e" "."
str_count(x, "")
## [1] 19

Combining strings

str_c("x", "y")
## [1] "xy"
str_c("x", "y", "z")
## [1] "xyz"
str_c("x", "y", sep = ", ")
## [1] "x, y"
x <- c("abc", NA)
str_c("|-", x, "-|")
## [1] "|-abc-|" NA
str_c("|-", str_replace_na(x), "-|")
## [1] "|-abc-|" "|-NA-|"
str_c("prefix-", c("a", "b", "c"), "-suffix")
## [1] "prefix-a-suffix" "prefix-b-suffix" "prefix-c-suffix"
name <- "Hadley"
time_of_day <- "morning"
birthday <- FALSE

str_c(
  "Good ", time_of_day, " ", name,
  if (birthday) " and HAPPY BIRTHDAY",
  "."
)
## [1] "Good morning Hadley."
str_c(c("x", "y", "z"), collapse = ", ")
## [1] "x, y, z"

Basic matches

x <- c("apple", "banana", "pear")
str_view(x, "an")
str_view(x, ".a.") #., which matches any character
str_view(x, "^a")  # ^ to match the start of the string.
str_view(x, "a$")  # $ to match the end of the string.

epression

  1. matches any digit.
  2. : matches any whitespace (e.g. space, tab, newline).

Reference

  1. https://r4ds.had.co.nz/strings.html#anchors