Library
library(stringr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
length,collapse,sub string
str_length(x)
## [1] 3 5 5 5 4 9
str_c(x, collapse = ", ")
## [1] "why, video, cross, extra, deal, authority"
str_sub(x, 1, 2)
## [1] "wh" "vi" "cr" "ex" "de" "au"
[aeiou] pattern
str_detect(x, "[aeiou]")
## [1] FALSE TRUE TRUE TRUE TRUE TRUE
str_count(x, "[aeiou]")
## [1] 0 3 1 2 2 4
str_subset(x, "[aeiou]")
## [1] "video" "cross" "extra" "deal" "authority"
str_extract(x, "[aeiou]")
## [1] NA "i" "o" "e" "e" "a"
str_replace(x, "[aeiou]", "?")
## [1] "why" "v?deo" "cr?ss" "?xtra" "d?al" "?uthority"
Whitespace
‘str_pad’
x <- c("abc", "defghi")
str_pad(x, 10) # default pads on left
## [1] " abc" " defghi"
str_pad(x, 10, "both")
## [1] " abc " " defghi "
str_pad(x, 4)
## [1] " abc" "defghi"
‘trunc’ & ‘str_pad’
x <- c("Short", "This is a long string")
x %>%
str_trunc(10) %>%
str_pad(10, "right")
## [1] "Short " "This is..."
‘trim’
x <- c(" a ", "b ", " c")
str_trim(x)
## [1] "a" "b" "c"
str_trim(x, "left")
## [1] "a " "b " "c"
wraping
jabberwocky <- str_c(
"`Twas brillig, and the slithy toves ",
"did gyre and gimble in the wabe: ",
"All mimsy were the borogoves, ",
"and the mome raths outgrabe. "
)
cat(str_wrap(jabberwocky, width = 40))
## `Twas brillig, and the slithy toves did
## gyre and gimble in the wabe: All mimsy
## were the borogoves, and the mome raths
## outgrabe.
#> `Twas brillig, and the slithy toves did
#> gyre and gimble in the wabe: All mimsy
#> were the borogoves, and the mome raths
#> outgrabe.
Locale sensitive
x <- "I like horses."
str_to_upper(x)
## [1] "I LIKE HORSES."
str_to_title(x)
## [1] "I Like Horses."
str_to_lower(x)
## [1] "i like horses."
String ordering and sorting
x <- c("y", "i", "k")
str_order(x)
## [1] 2 3 1
str_sort(x)
## [1] "i" "k" "y"
Pattern matching
strings <- c(
"apple",
"219 733 8965",
"329-293-8753",
"Work: 579-499-7527; Home: 543.355.3679"
)
phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"
str_detect(strings, phone)
## [1] FALSE TRUE TRUE TRUE
str_subset(strings, phone)
## [1] "219 733 8965"
## [2] "329-293-8753"
## [3] "Work: 579-499-7527; Home: 543.355.3679"
str_count(strings, phone)
## [1] 0 1 1 2
str_extract(strings, phone)
## [1] NA "219 733 8965" "329-293-8753" "579-499-7527"
str_extract_all(strings, phone, simplify = TRUE)
## [,1] [,2]
## [1,] "" ""
## [2,] "219 733 8965" ""
## [3,] "329-293-8753" ""
## [4,] "579-499-7527" "543.355.3679"
replace value
str_replace(strings, phone, "XXX-XXX-XXXX")
## [1] "apple"
## [2] "XXX-XXX-XXXX"
## [3] "XXX-XXX-XXXX"
## [4] "Work: XXX-XXX-XXXX; Home: 543.355.3679"
str_replace_all(strings, phone, "XXX-XXX-XXXX")
## [1] "apple"
## [2] "XXX-XXX-XXXX"
## [3] "XXX-XXX-XXXX"
## [4] "Work: XXX-XXX-XXXX; Home: XXX-XXX-XXXX"
Split
str_split("a-b-c", "-")
## [[1]]
## [1] "a" "b" "c"
str_split_fixed("a-b-c", "-", n = 2)
## [,1] [,2]
## [1,] "a" "b-c"
x <- "This is a sentence."
str_split(x, "")
## [[1]]
## [1] "T" "h" "i" "s" " " "i" "s" " " "a" " " "s" "e" "n" "t" "e" "n" "c" "e" "."
str_count(x, "")
## [1] 19
Combining strings
str_c("x", "y")
## [1] "xy"
str_c("x", "y", "z")
## [1] "xyz"
str_c("x", "y", sep = ", ")
## [1] "x, y"
x <- c("abc", NA)
str_c("|-", x, "-|")
## [1] "|-abc-|" NA
str_c("|-", str_replace_na(x), "-|")
## [1] "|-abc-|" "|-NA-|"
str_c("prefix-", c("a", "b", "c"), "-suffix")
## [1] "prefix-a-suffix" "prefix-b-suffix" "prefix-c-suffix"
name <- "Hadley"
time_of_day <- "morning"
birthday <- FALSE
str_c(
"Good ", time_of_day, " ", name,
if (birthday) " and HAPPY BIRTHDAY",
"."
)
## [1] "Good morning Hadley."
str_c(c("x", "y", "z"), collapse = ", ")
## [1] "x, y, z"
Basic matches
x <- c("apple", "banana", "pear")
str_view(x, "an")
str_view(x, ".a.") #., which matches any character
str_view(x, "^a") # ^ to match the start of the string.
str_view(x, "a$") # $ to match the end of the string.
epression
- matches any digit.
- : matches any whitespace (e.g. space, tab, newline).