R Stringr : Part 1

Library

library(stringr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Data

x <- c("why", "video", "cross", "extra", "deal", "authority")
x

## [1] "why"       "video"     "cross"     "extra"     "deal"      "authority"

length,collapse,sub string

str_length(x)

## [1] 3 5 5 5 4 9

str_c(x, collapse = ", ")

## [1] "why, video, cross, extra, deal, authority"

str_sub(x, 1, 2)

## [1] "wh" "vi" "cr" "ex" "de" "au"

[aeiou] pattern

str_detect(x, "[aeiou]")

## [1] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE

str_count(x, "[aeiou]")

## [1] 0 3 1 2 2 4

str_subset(x, "[aeiou]")

## [1] "video"     "cross"     "extra"     "deal"      "authority"

str_extract(x, "[aeiou]")

## [1] NA  "i" "o" "e" "e" "a"

str_replace(x, "[aeiou]", "?")

## [1] "why"       "v?deo"     "cr?ss"     "?xtra"     "d?al"      "?uthority"

Whitespace

‘str_pad’

x <- c("abc", "defghi")
str_pad(x, 10) # default pads on left

## [1] "       abc" "    defghi"

str_pad(x, 10, "both")

## [1] "   abc    " "  defghi  "

str_pad(x, 4)

## [1] " abc"   "defghi"

‘trunc’ & ‘str_pad’

x <- c("Short", "This is a long string")

x %>% 
  str_trunc(10) %>% 
  str_pad(10, "right")

## [1] "Short     " "This is..."

‘trim’

x <- c("  a   ", "b   ",  "   c")
str_trim(x)

## [1] "a" "b" "c"

str_trim(x, "left")

## [1] "a   " "b   " "c"

wraping

jabberwocky <- str_c(
  "`Twas brillig, and the slithy toves ",
  "did gyre and gimble in the wabe: ",
  "All mimsy were the borogoves, ",
  "and the mome raths outgrabe. "
)
cat(str_wrap(jabberwocky, width = 40))

## `Twas brillig, and the slithy toves did
## gyre and gimble in the wabe: All mimsy
## were the borogoves, and the mome raths
## outgrabe.

#> `Twas brillig, and the slithy toves did
#> gyre and gimble in the wabe: All mimsy
#> were the borogoves, and the mome raths
#> outgrabe.

Locale sensitive

x <- "I like horses."
str_to_upper(x)

## [1] "I LIKE HORSES."

str_to_title(x)

## [1] "I Like Horses."

str_to_lower(x)

## [1] "i like horses."

String ordering and sorting

x <- c("y", "i", "k")
str_order(x)

## [1] 2 3 1

str_sort(x)

## [1] "i" "k" "y"

Pattern matching

strings <- c(
  "apple", 
  "219 733 8965", 
  "329-293-8753", 
  "Work: 579-499-7527; Home: 543.355.3679"
)
phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"
str_detect(strings, phone)

## [1] FALSE  TRUE  TRUE  TRUE

str_subset(strings, phone)

## [1] "219 733 8965"                          
## [2] "329-293-8753"                          
## [3] "Work: 579-499-7527; Home: 543.355.3679"

str_count(strings, phone)

## [1] 0 1 1 2

str_extract(strings, phone)

## [1] NA             "219 733 8965" "329-293-8753" "579-499-7527"

str_extract_all(strings, phone, simplify = TRUE)

##      [,1]           [,2]          
## [1,] ""             ""            
## [2,] "219 733 8965" ""            
## [3,] "329-293-8753" ""            
## [4,] "579-499-7527" "543.355.3679"

replace value

str_replace(strings, phone, "XXX-XXX-XXXX")

## [1] "apple"                                 
## [2] "XXX-XXX-XXXX"                          
## [3] "XXX-XXX-XXXX"                          
## [4] "Work: XXX-XXX-XXXX; Home: 543.355.3679"

str_replace_all(strings, phone, "XXX-XXX-XXXX")

## [1] "apple"                                 
## [2] "XXX-XXX-XXXX"                          
## [3] "XXX-XXX-XXXX"                          
## [4] "Work: XXX-XXX-XXXX; Home: XXX-XXX-XXXX"

Split

str_split("a-b-c", "-")

## [[1]]
## [1] "a" "b" "c"

str_split_fixed("a-b-c", "-", n = 2)

##      [,1] [,2] 
## [1,] "a"  "b-c"

x <- "This is a sentence."
str_split(x, "")

## [[1]]
##  [1] "T" "h" "i" "s" " " "i" "s" " " "a" " " "s" "e" "n" "t" "e" "n" "c" "e" "."

str_count(x, "")

## [1] 19

Combining strings

str_c("x", "y")

## [1] "xy"

str_c("x", "y", "z")

## [1] "xyz"

str_c("x", "y", sep = ", ")

## [1] "x, y"

x <- c("abc", NA)
str_c("|-", x, "-|")

## [1] "|-abc-|" NA

str_c("|-", str_replace_na(x), "-|")

## [1] "|-abc-|" "|-NA-|"

str_c("prefix-", c("a", "b", "c"), "-suffix")

## [1] "prefix-a-suffix" "prefix-b-suffix" "prefix-c-suffix"

name <- "Hadley"
time_of_day <- "morning"
birthday <- FALSE

str_c(
  "Good ", time_of_day, " ", name,
  if (birthday) " and HAPPY BIRTHDAY",
  "."
)

## [1] "Good morning Hadley."

str_c(c("x", "y", "z"), collapse = ", ")

## [1] "x, y, z"

Basic matches

x <- c("apple", "banana", "pear")
str_view(x, "an")

str_view(x, ".a.") #., which matches any character

str_view(x, "^a")  # ^ to match the start of the string.

str_view(x, "a$")  # $ to match the end of the string.

epression

matches any digit.
: matches any whitespace (e.g. space, tab, newline).

Reference

https://r4ds.had.co.nz/strings.html#anchors

R Stringr : Part 1

Naimul Islam

2022-10-11

Library

Data

length,collapse,sub string

[aeiou] pattern

Whitespace

‘str_pad’

‘trunc’ & ‘str_pad’

‘trim’

wraping

Locale sensitive

String ordering and sorting

Pattern matching

replace value

Split

Combining strings

Basic matches

epression

Reference