## Warning: package 'htmlwidgets' was built under R version 4.2.1
#paste separates strings
paste("foo", "bar")
## [1] "foo bar"
#paste0 doesn't separate strings
paste0("foo", "bar")
## [1] "foobar"
#paste0 is closer to str_c
str_c("foo", "bar")
## [1] "foobar"
#str_c returns NA for strings that contains missing values
str_c("foo", NA)
## [1] NA
#paste and paste0 will treat NA as character
paste("foo", NA)
## [1] "foo NA"
paste0("foo", NA)
## [1] "fooNA"
#This function extracts the middle character.For strings that have even number, we select n/2 while n=number of characters.
x <- c("a", "abc", "abcd", "abcde", "abcdef")
L <- str_length(x)
m <- ceiling(L / 2)
str_sub(x, m, m)
## [1] "a" "b" "b" "c" "c"
str_view("\"'\\", "\"'\\\\", match = TRUE)
#It will match any patterns that are a dot followed by any character, repeated three times.
str_view(c(".a.b.c", ".a.b", "....."), c("\\..\\..\\.."), match = TRUE)
Start with a vowel. That only contain consonants. (Hint: thinking about matching “not”-vowels.) End with ed, but not with eed. End with ing or ise.
#Words starting with vowels
str_subset(stringr::words, "^[aeiou]")
## [1] "a" "able" "about" "absolute" "accept"
## [6] "account" "achieve" "across" "act" "active"
## [11] "actual" "add" "address" "admit" "advertise"
## [16] "affect" "afford" "after" "afternoon" "again"
## [21] "against" "age" "agent" "ago" "agree"
## [26] "air" "all" "allow" "almost" "along"
## [31] "already" "alright" "also" "although" "always"
## [36] "america" "amount" "and" "another" "answer"
## [41] "any" "apart" "apparent" "appear" "apply"
## [46] "appoint" "approach" "appropriate" "area" "argue"
## [51] "arm" "around" "arrange" "art" "as"
## [56] "ask" "associate" "assume" "at" "attend"
## [61] "authority" "available" "aware" "away" "awful"
## [66] "each" "early" "east" "easy" "eat"
## [71] "economy" "educate" "effect" "egg" "eight"
## [76] "either" "elect" "electric" "eleven" "else"
## [81] "employ" "encourage" "end" "engine" "english"
## [86] "enjoy" "enough" "enter" "environment" "equal"
## [91] "especial" "europe" "even" "evening" "ever"
## [96] "every" "evidence" "exact" "example" "except"
## [101] "excuse" "exercise" "exist" "expect" "expense"
## [106] "experience" "explain" "express" "extra" "eye"
## [111] "idea" "identify" "if" "imagine" "important"
## [116] "improve" "in" "include" "income" "increase"
## [121] "indeed" "individual" "industry" "inform" "inside"
## [126] "instead" "insure" "interest" "into" "introduce"
## [131] "invest" "involve" "issue" "it" "item"
## [136] "obvious" "occasion" "odd" "of" "off"
## [141] "offer" "office" "often" "okay" "old"
## [146] "on" "once" "one" "only" "open"
## [151] "operate" "opportunity" "oppose" "or" "order"
## [156] "organize" "original" "other" "otherwise" "ought"
## [161] "out" "over" "own" "under" "understand"
## [166] "union" "unit" "unite" "university" "unless"
## [171] "until" "up" "upon" "use" "usual"
#Words that contain only consonants: Use the negate argument of str_subset.
str_subset(stringr::words, "[aeiou]", negate=TRUE)
## [1] "by" "dry" "fly" "mrs" "try" "why"
#Words that end with “-ed” but not ending in “-eed”.
str_subset(stringr::words, "[^e]ed$")
## [1] "bed" "hundred" "red"
#Words ending in ing or ise:
str_subset(stringr::words, "i(ng|se)$")
## [1] "advertise" "bring" "during" "evening" "exercise" "king"
## [7] "meaning" "morning" "otherwise" "practise" "raise" "realise"
## [13] "ring" "rise" "sing" "surprise" "thing"
#Start with three consonants.
str_view(words, "^[^aeiou]{3}", match = TRUE)
#Have three or more vowels in a row.
str_view(words, "[aeiou]{3,}", match = TRUE)
#Have two or more vowel-consonant pairs in a row.
str_view(words, "([aeiou][^aeiou]){2,}", match = TRUE)
#Find all words that start or end with x.
words[str_detect(words, "^x|x$")]
## [1] "box" "sex" "six" "tax"
start_with_x <- str_detect(words, "^x")
end_with_x <- str_detect(words, "x$")
words[start_with_x | end_with_x]
## [1] "box" "sex" "six" "tax"
#Find all words that start with a vowel and end with a consonant.
str_subset(words, "^[aeiou].*[^aeiou]$") %>% head()
## [1] "about" "accept" "account" "across" "act" "actual"
start_with_vowel <- str_detect(words, "^[aeiou]")
end_with_consonant <- str_detect(words, "[^aeiou]$")
words[start_with_vowel & end_with_consonant] %>% head()
## [1] "about" "accept" "account" "across" "act" "actual"
#Are there any words that contain at least one of each different vowel?
pattern <-
cross(rerun(5, c("a", "e", "i", "o", "u")),
.filter = function(...) {
x <- as.character(unlist(list(...)))
length(x) != length(unique(x))
}
) %>%
map_chr(~str_c(unlist(.x), collapse = ".*")) %>%
str_c(collapse = "|")
str_subset(words, pattern)
## character(0)
words[str_detect(words, "a") &
str_detect(words, "e") &
str_detect(words, "i") &
str_detect(words, "o") &
str_detect(words, "u")]
## character(0)
#The first word from each sentence.
str_extract(sentences, "[A-ZAa-z]+") %>% head()
## [1] "The" "Glue" "It" "These" "Rice" "The"
str_extract(sentences, "[A-Za-z][A-Za-z']*") %>% head()
## [1] "The" "Glue" "It's" "These" "Rice" "The"
#All words ending in ing.
pattern <- "\\b[A-Za-z]+ing\\b"
sentences_with_ing <- str_detect(sentences, pattern)
unique(unlist(str_extract_all(sentences[sentences_with_ing], pattern))) %>%
head()
## [1] "spring" "evening" "morning" "winding" "living" "king"
#All plurals.
unique(unlist(str_extract_all(sentences, "\\b[A-Za-z]{3,}s\\b"))) %>%
head()
## [1] "planks" "days" "bowls" "lemons" "makes" "hogs"
numword <- "\\b(one|two|three|four|five|six|seven|eight|nine|ten) +(\\w+)"
sentences[str_detect(sentences, numword)] %>%
str_extract(numword)
## [1] "seven books" "two met" "two factors" "three lists"
## [5] "seven is" "two when" "ten inches" "one war"
## [9] "one button" "six minutes" "ten years" "two shares"
## [13] "two distinct" "five cents" "two pins" "five robins"
## [17] "four kinds" "three story" "three inches" "six comes"
## [21] "three batches" "two leaves"
contraction <- "([A-Za-z]+)'([A-Za-z]+)"
sentences[str_detect(sentences, contraction)] %>%
str_extract(contraction) %>%
str_split("'")
## [[1]]
## [1] "It" "s"
##
## [[2]]
## [1] "man" "s"
##
## [[3]]
## [1] "don" "t"
##
## [[4]]
## [1] "store" "s"
##
## [[5]]
## [1] "workmen" "s"
##
## [[6]]
## [1] "Let" "s"
##
## [[7]]
## [1] "sun" "s"
##
## [[8]]
## [1] "child" "s"
##
## [[9]]
## [1] "king" "s"
##
## [[10]]
## [1] "It" "s"
##
## [[11]]
## [1] "don" "t"
##
## [[12]]
## [1] "queen" "s"
##
## [[13]]
## [1] "don" "t"
##
## [[14]]
## [1] "pirate" "s"
##
## [[15]]
## [1] "neighbor" "s"
str_replace_all("past/present/future", "/", "\\\\")
## [1] "past\\present\\future"
replacements <- c("A" = "a", "B" = "b", "C" = "c", "D" = "d", "E" = "e",
"F" = "f", "G" = "g", "H" = "h", "I" = "i", "J" = "j",
"K" = "k", "L" = "l", "M" = "m", "N" = "n", "O" = "o",
"P" = "p", "Q" = "q", "R" = "r", "S" = "s", "T" = "t",
"U" = "u", "V" = "v", "W" = "w", "X" = "x", "Y" = "y",
"Z" = "z")
lower_words <- str_replace_all(words, pattern = replacements)
head(lower_words)
## [1] "a" "able" "about" "absolute" "accept" "account"
x <- c("apples, pears, and bananas")
str_split(x, ", +(and +)?")[[1]]
## [1] "apples" "pears" "bananas"
### 14.5
str_subset(c("a\\b", "ab"), "\\\\")
## [1] "a\\b"
str_subset(c("a\\b", "ab"), fixed("\\"))
## [1] "a\\b"