library(stringr)
paste("car", "dealer")
## [1] "car dealer"
paste0("car", "dealer")
## [1] "cardealer"
str_c("car", "dealer")
## [1] "cardealer"
# as we can see in the results paste will have a space between the words, paste0() won't.
# paste0() is the equivalent of str_c()
paste("car", NA)
## [1] "car NA"
paste0("car", NA)
## [1] "carNA"
str_c("car", NA)
## [1] NA
# paste() and paste0() will just concatenate na with first word while str_c will return just NA just like numeric functions.
x <- c("car", "tires", "engine", "brakes calipers", "suspensions")
Len <- str_length(x)
mid <- ceiling (Len / 2)
str_sub(x, mid, mid)
## [1] "a" "r" "g" "c" "n"
# Here we are use the function str_length to determine the lenght of the string then we establish the number of character by dividing by 2 and when the the string has an even number length, the choice is arbitrary but will always return a character no matter what the scenario is
str_wrap("In this R programming tutorial you’ll learn how to apply the str_wrap function to wrap a character string into nicely formatted paragraphs.", 20)
## [1] "In this R\nprogramming tutorial\nyou’ll learn\nhow to apply the\nstr_wrap function\nto wrap a character\nstring into\nnicely formatted\nparagraphs."
#it inserted "\n" pattern into the string so that it outputs a nice formatted paragraph when it is exported. it is useful when we have along string and we need the text to fit within a certain width.
x <- "a\\b"
writeLines(x)
## a\b
str_view(x, "\\\\")
## [1] | a<\>b
x <- " \"'\\ car"
writeLines(x)
## "'\ car
str_view(x, "\"'\\\\")
## [1] | <"'\> car
str_view(c(".c.a.r", ".c.a.r.s",".c.a."), c("\\..\\..\\.."), match = TRUE)
## [1] | <.c.a.r>
## [2] | <.c.a.r>.s
str_subset(words, "^[aeiouy]")
## [1] "a" "able" "about" "absolute" "accept"
## [6] "account" "achieve" "across" "act" "active"
## [11] "actual" "add" "address" "admit" "advertise"
## [16] "affect" "afford" "after" "afternoon" "again"
## [21] "against" "age" "agent" "ago" "agree"
## [26] "air" "all" "allow" "almost" "along"
## [31] "already" "alright" "also" "although" "always"
## [36] "america" "amount" "and" "another" "answer"
## [41] "any" "apart" "apparent" "appear" "apply"
## [46] "appoint" "approach" "appropriate" "area" "argue"
## [51] "arm" "around" "arrange" "art" "as"
## [56] "ask" "associate" "assume" "at" "attend"
## [61] "authority" "available" "aware" "away" "awful"
## [66] "each" "early" "east" "easy" "eat"
## [71] "economy" "educate" "effect" "egg" "eight"
## [76] "either" "elect" "electric" "eleven" "else"
## [81] "employ" "encourage" "end" "engine" "english"
## [86] "enjoy" "enough" "enter" "environment" "equal"
## [91] "especial" "europe" "even" "evening" "ever"
## [96] "every" "evidence" "exact" "example" "except"
## [101] "excuse" "exercise" "exist" "expect" "expense"
## [106] "experience" "explain" "express" "extra" "eye"
## [111] "idea" "identify" "if" "imagine" "important"
## [116] "improve" "in" "include" "income" "increase"
## [121] "indeed" "individual" "industry" "inform" "inside"
## [126] "instead" "insure" "interest" "into" "introduce"
## [131] "invest" "involve" "issue" "it" "item"
## [136] "obvious" "occasion" "odd" "of" "off"
## [141] "offer" "office" "often" "okay" "old"
## [146] "on" "once" "one" "only" "open"
## [151] "operate" "opportunity" "oppose" "or" "order"
## [156] "organize" "original" "other" "otherwise" "ought"
## [161] "out" "over" "own" "under" "understand"
## [166] "union" "unit" "unite" "university" "unless"
## [171] "until" "up" "upon" "use" "usual"
## [176] "year" "yes" "yesterday" "yet" "you"
## [181] "young"
# this function returns words that starts with a vowel.
str_subset(words, "ing$|ise$")
## [1] "advertise" "bring" "during" "evening" "exercise" "king"
## [7] "meaning" "morning" "otherwise" "practise" "raise" "realise"
## [13] "ring" "rise" "sing" "surprise" "thing"
# this function returns words that finish with ing or ise.
#^.*$ : this will match any string be it word or numbers
#"\\{.+\\}" : This will match any string between {} with at least one character
#\d{4}-\d{2}-\d{2}: This will match any string like date format YYYY-MM-DD, 4 numbers followed by - then 2 numbers followed by - then 2 numbers
#"\\\\{4}": this will match four \
# words start with 3 consonants
str_subset(words, "^[^aeiouy]{3}")
## [1] "Christ" "Christmas" "mrs" "scheme" "school" "straight"
## [7] "strategy" "street" "strike" "strong" "structure" "three"
## [13] "through" "throw"
# have 3 ore more vowels in a row
str_subset(words, "[aeiouy]{3,}")
## [1] "beauty" "eye" "obvious" "previous" "quiet" "serious"
## [7] "various" "year" "you" "young"
# have2 or more vowels consonant pairs in a row
str_subset(words, "([aeiouy][^aeiouy]){2,}")
## [1] "absolute" "agent" "along" "america" "another"
## [6] "apart" "apparent" "authority" "available" "aware"
## [11] "balance" "basis" "become" "before" "begin"
## [16] "behind" "benefit" "business" "character" "closes"
## [21] "community" "consider" "cover" "debate" "decide"
## [26] "decision" "definite" "department" "depend" "design"
## [31] "develop" "difference" "difficult" "direct" "divide"
## [36] "document" "during" "economy" "educate" "elect"
## [41] "electric" "eleven" "encourage" "environment" "europe"
## [46] "even" "evening" "ever" "every" "evidence"
## [51] "exact" "example" "exercise" "exist" "family"
## [56] "figure" "final" "finance" "finish" "future"
## [61] "general" "govern" "holiday" "honest" "hospital"
## [66] "however" "identify" "imagine" "individual" "interest"
## [71] "introduce" "item" "jesus" "level" "likely"
## [76] "limit" "local" "major" "manage" "meaning"
## [81] "measure" "minister" "minus" "minute" "moment"
## [86] "music" "nature" "necessary" "never" "notice"
## [91] "open" "operate" "opportunity" "organize" "original"
## [96] "over" "paper" "paragraph" "parent" "particular"
## [101] "photograph" "police" "policy" "politic" "position"
## [106] "positive" "power" "prepare" "present" "presume"
## [111] "private" "probable" "process" "produce" "product"
## [116] "project" "proper" "propose" "protect" "provide"
## [121] "quality" "realise" "reason" "recent" "recognize"
## [126] "recommend" "record" "reduce" "refer" "regard"
## [131] "relation" "remember" "report" "represent" "result"
## [136] "return" "saturday" "second" "secretary" "secure"
## [141] "separate" "seven" "similar" "specific" "strategy"
## [146] "student" "stupid" "telephone" "television" "therefore"
## [151] "thousand" "together" "tomorrow" "tonight" "total"
## [156] "toward" "travel" "unit" "unite" "university"
## [161] "upon" "visit" "water" "woman"
#(.)\1\1 : this will match 3 repeated characters
#"(.)(.)\\2\\1" : this matches any two characters followed by the same character in reverse order like xyyx
#(..)\1 : this will match any two character sequence that repeats itself like xyxy
#"(.).\\1.\\1" ; a text that has first, 3 rd and and fifth word being the same like a1a2a
#"(.)(.)(.).*\\3\\2\\1" : a text in which the start ting characters are the ending characters in reverse order like abc 123456 cba
#Find all words that start or end with x.
#one regex
words[str_detect(words, "^x|x$")]
## [1] "box" "sex" "six" "tax"
#multiple str_detect statment
strt <- str_detect(words, "^x")
end <- str_detect (words, "x$")
words[strt | end]
## [1] "box" "sex" "six" "tax"
#Find all words that start with a vowel and end with a consonant.
# one regex
words[str_detect(words, "^[aeiou].*[^aeiou]$")]
## [1] "about" "accept" "account" "across" "act"
## [6] "actual" "add" "address" "admit" "affect"
## [11] "afford" "after" "afternoon" "again" "against"
## [16] "agent" "air" "all" "allow" "almost"
## [21] "along" "already" "alright" "although" "always"
## [26] "amount" "and" "another" "answer" "any"
## [31] "apart" "apparent" "appear" "apply" "appoint"
## [36] "approach" "arm" "around" "art" "as"
## [41] "ask" "at" "attend" "authority" "away"
## [46] "awful" "each" "early" "east" "easy"
## [51] "eat" "economy" "effect" "egg" "eight"
## [56] "either" "elect" "electric" "eleven" "employ"
## [61] "end" "english" "enjoy" "enough" "enter"
## [66] "environment" "equal" "especial" "even" "evening"
## [71] "ever" "every" "exact" "except" "exist"
## [76] "expect" "explain" "express" "identify" "if"
## [81] "important" "in" "indeed" "individual" "industry"
## [86] "inform" "instead" "interest" "invest" "it"
## [91] "item" "obvious" "occasion" "odd" "of"
## [96] "off" "offer" "often" "okay" "old"
## [101] "on" "only" "open" "opportunity" "or"
## [106] "order" "original" "other" "ought" "out"
## [111] "over" "own" "under" "understand" "union"
## [116] "unit" "university" "unless" "until" "up"
## [121] "upon" "usual"
#multiple str_detect statment
strt1 <- str_detect(words, "^[aeiou]")
end1 <- str_detect (words, "[^aeiou]$")
words[strt1 | end1]
## [1] "a" "able" "about" "absolute" "accept"
## [6] "account" "achieve" "across" "act" "active"
## [11] "actual" "add" "address" "admit" "advertise"
## [16] "affect" "afford" "after" "afternoon" "again"
## [21] "against" "age" "agent" "ago" "agree"
## [26] "air" "all" "allow" "almost" "along"
## [31] "already" "alright" "also" "although" "always"
## [36] "america" "amount" "and" "another" "answer"
## [41] "any" "apart" "apparent" "appear" "apply"
## [46] "appoint" "approach" "appropriate" "area" "argue"
## [51] "arm" "around" "arrange" "art" "as"
## [56] "ask" "associate" "assume" "at" "attend"
## [61] "authority" "available" "aware" "away" "awful"
## [66] "baby" "back" "bad" "bag" "ball"
## [71] "bank" "bar" "basis" "bear" "beat"
## [76] "beauty" "bed" "begin" "behind" "benefit"
## [81] "best" "bet" "between" "big" "bill"
## [86] "birth" "bit" "black" "blood" "blow"
## [91] "board" "boat" "body" "book" "both"
## [96] "bother" "bottom" "box" "boy" "break"
## [101] "brief" "brilliant" "bring" "britain" "brother"
## [106] "budget" "build" "bus" "business" "busy"
## [111] "but" "buy" "by" "call" "can"
## [116] "car" "card" "carry" "cat" "catch"
## [121] "cent" "certain" "chair" "chairman" "chap"
## [126] "character" "cheap" "check" "child" "Christ"
## [131] "Christmas" "church" "city" "claim" "class"
## [136] "clean" "clear" "client" "clock" "closes"
## [141] "club" "cold" "collect" "colour" "comment"
## [146] "commit" "common" "community" "company" "concern"
## [151] "condition" "confer" "consider" "consult" "contact"
## [156] "contract" "control" "cook" "copy" "corner"
## [161] "correct" "cost" "could" "council" "count"
## [166] "country" "county" "court" "cover" "cross"
## [171] "cup" "current" "cut" "dad" "danger"
## [176] "day" "dead" "deal" "dear" "decision"
## [181] "deep" "department" "depend" "design" "detail"
## [186] "develop" "difficult" "dinner" "direct" "discuss"
## [191] "district" "doctor" "document" "dog" "door"
## [196] "doubt" "down" "draw" "dress" "drink"
## [201] "drop" "dry" "during" "each" "early"
## [206] "east" "easy" "eat" "economy" "educate"
## [211] "effect" "egg" "eight" "either" "elect"
## [216] "electric" "eleven" "else" "employ" "encourage"
## [221] "end" "engine" "english" "enjoy" "enough"
## [226] "enter" "environment" "equal" "especial" "europe"
## [231] "even" "evening" "ever" "every" "evidence"
## [236] "exact" "example" "except" "excuse" "exercise"
## [241] "exist" "expect" "expense" "experience" "explain"
## [246] "express" "extra" "eye" "fact" "fair"
## [251] "fall" "family" "far" "farm" "fast"
## [256] "father" "favour" "feed" "feel" "few"
## [261] "field" "fight" "fill" "film" "final"
## [266] "find" "finish" "first" "fish" "fit"
## [271] "flat" "floor" "fly" "follow" "food"
## [276] "foot" "for" "forget" "form" "forward"
## [281] "four" "friday" "friend" "from" "front"
## [286] "full" "fun" "function" "fund" "further"
## [291] "garden" "gas" "general" "germany" "get"
## [296] "girl" "glass" "god" "good" "govern"
## [301] "grand" "grant" "great" "green" "ground"
## [306] "group" "grow" "guess" "guy" "hair"
## [311] "half" "hall" "hand" "hang" "happen"
## [316] "happy" "hard" "head" "health" "hear"
## [321] "heart" "heat" "heavy" "hell" "help"
## [326] "high" "history" "hit" "hold" "holiday"
## [331] "honest" "hospital" "hot" "hour" "how"
## [336] "however" "hundred" "husband" "idea" "identify"
## [341] "if" "imagine" "important" "improve" "in"
## [346] "include" "income" "increase" "indeed" "individual"
## [351] "industry" "inform" "inside" "instead" "insure"
## [356] "interest" "into" "introduce" "invest" "involve"
## [361] "issue" "it" "item" "jesus" "job"
## [366] "join" "jump" "just" "keep" "key"
## [371] "kid" "kill" "kind" "king" "kitchen"
## [376] "knock" "know" "labour" "lad" "lady"
## [381] "land" "last" "laugh" "law" "lay"
## [386] "lead" "learn" "left" "leg" "less"
## [391] "let" "letter" "level" "light" "likely"
## [396] "limit" "link" "list" "listen" "load"
## [401] "local" "lock" "london" "long" "look"
## [406] "lord" "lot" "low" "luck" "lunch"
## [411] "main" "major" "man" "many" "mark"
## [416] "market" "marry" "match" "matter" "may"
## [421] "mean" "meaning" "meet" "member" "mention"
## [426] "might" "milk" "million" "mind" "minister"
## [431] "minus" "miss" "mister" "moment" "monday"
## [436] "money" "month" "morning" "most" "mother"
## [441] "motion" "mrs" "much" "music" "must"
## [446] "nation" "near" "necessary" "need" "never"
## [451] "new" "news" "next" "night" "non"
## [456] "normal" "north" "not" "now" "number"
## [461] "obvious" "occasion" "odd" "of" "off"
## [466] "offer" "office" "often" "okay" "old"
## [471] "on" "once" "one" "only" "open"
## [476] "operate" "opportunity" "oppose" "or" "order"
## [481] "organize" "original" "other" "otherwise" "ought"
## [486] "out" "over" "own" "pack" "paint"
## [491] "pair" "paper" "paragraph" "pardon" "parent"
## [496] "park" "part" "particular" "party" "pass"
## [501] "past" "pay" "pension" "per" "percent"
## [506] "perfect" "perhaps" "period" "person" "photograph"
## [511] "pick" "plan" "play" "plus" "point"
## [516] "policy" "politic" "poor" "position" "post"
## [521] "pound" "power" "present" "press" "pretty"
## [526] "previous" "print" "problem" "proceed" "process"
## [531] "product" "project" "proper" "protect" "public"
## [536] "pull" "push" "put" "quality" "quarter"
## [541] "question" "quick" "quid" "quiet" "rail"
## [546] "rather" "read" "ready" "real" "really"
## [551] "reason" "recent" "reckon" "recommend" "record"
## [556] "red" "refer" "regard" "region" "relation"
## [561] "remember" "report" "represent" "research" "respect"
## [566] "rest" "result" "return" "rid" "right"
## [571] "ring" "road" "roll" "room" "round"
## [576] "run" "saturday" "say" "school" "scotland"
## [581] "seat" "second" "secretary" "section" "seem"
## [586] "self" "sell" "send" "serious" "set"
## [591] "seven" "sex" "shall" "sheet" "shoot"
## [596] "shop" "short" "should" "show" "shut"
## [601] "sick" "sign" "similar" "sing" "sir"
## [606] "sister" "sit" "six" "sleep" "slight"
## [611] "slow" "small" "social" "society" "son"
## [616] "soon" "sorry" "sort" "sound" "south"
## [621] "speak" "special" "specific" "speed" "spell"
## [626] "spend" "staff" "stairs" "stand" "standard"
## [631] "start" "station" "stay" "step" "stick"
## [636] "still" "stop" "story" "straight" "strategy"
## [641] "street" "strong" "student" "study" "stuff"
## [646] "stupid" "subject" "succeed" "such" "sudden"
## [651] "suggest" "suit" "summer" "sun" "sunday"
## [656] "supply" "support" "switch" "system" "talk"
## [661] "tax" "teach" "team" "television" "tell"
## [666] "ten" "tend" "term" "test" "than"
## [671] "thank" "then" "they" "thing" "think"
## [676] "thirteen" "thirty" "this" "though" "thousand"
## [681] "through" "throw" "thursday" "today" "together"
## [686] "tomorrow" "tonight" "top" "total" "touch"
## [691] "toward" "town" "traffic" "train" "transport"
## [696] "travel" "treat" "trust" "try" "tuesday"
## [701] "turn" "twenty" "under" "understand" "union"
## [706] "unit" "unite" "university" "unless" "until"
## [711] "up" "upon" "use" "usual" "various"
## [716] "very" "view" "visit" "wait" "walk"
## [721] "wall" "want" "war" "warm" "wash"
## [726] "watch" "water" "way" "wear" "wednesday"
## [731] "week" "weigh" "well" "west" "what"
## [736] "when" "whether" "which" "why" "will"
## [741] "win" "wind" "window" "wish" "with"
## [746] "within" "without" "woman" "wonder" "wood"
## [751] "word" "work" "world" "worry" "worth"
## [756] "would" "wrong" "year" "yes" "yesterday"
## [761] "yet" "young"
#Are there any words that contain at least one of each different vowel?
words[str_detect(words, "a") &
str_detect(words, "e") &
str_detect(words, "i") &
str_detect(words, "o") &
str_detect(words, "u")]
## character(0)
1.
## [1] 1
str_extract(sentences, "[A-Za-z][A-Za-z']*") %>% head()
## [1] "The" "Glue" "It's" "These" "Rice" "The"
2.
## [1] 2
p <- "\\b[A-Za-z]+ing\\b"
finish_ing <- str_detect(sentences, p)
unique(unlist(str_extract_all(sentences[finish_ing],p))) %>% head()
## [1] "spring" "evening" "morning" "winding" "living" "king"
3.
## [1] 3
unique(unlist(str_extract_all(sentences[finish_ing],"\\b[A-Za-z]{3,}s\\b"))) %>% head()
## [1] "helps" "pass" "days" "leads" "sums" "boards"
numb_word <- "\\b(one|two|three|four|five|six|seven|eight|nine|ten) +(\\w+)"
sentences[str_detect(sentences, numb_word)] %>%
str_extract(numb_word)
## [1] "seven books" "two met" "two factors" "three lists"
## [5] "seven is" "two when" "ten inches" "one war"
## [9] "one button" "six minutes" "ten years" "two shares"
## [13] "two distinct" "five cents" "two pins" "five robins"
## [17] "four kinds" "three story" "three inches" "six comes"
## [21] "three batches" "two leaves"
cont<- "([A-Za-z]+)'([A-Za-z]+)"
sentences[str_detect(sentences, cont)] %>%
str_match(cont)
## [,1] [,2] [,3]
## [1,] "It's" "It" "s"
## [2,] "man's" "man" "s"
## [3,] "don't" "don" "t"
## [4,] "store's" "store" "s"
## [5,] "workman's" "workman" "s"
## [6,] "Let's" "Let" "s"
## [7,] "sun's" "sun" "s"
## [8,] "child's" "child" "s"
## [9,] "king's" "king" "s"
## [10,] "It's" "It" "s"
## [11,] "don't" "don" "t"
## [12,] "queen's" "queen" "s"
## [13,] "don't" "don" "t"
## [14,] "don't" "don" "t"
## [15,] "don't" "don" "t"
## [16,] "don't" "don" "t"
## [17,] "pirate's" "pirate" "s"
## [18,] "neighbor's" "neighbor" "s"
str_replace_all("test/test/test/test", "/", "\\\\")
## [1] "test\\test\\test\\test"
rep <- c("A" = "a", "B" = "b", "C" = "c", "D" = "d", "E" = "e",
"F" = "f", "G" = "g", "H" = "h", "I" = "i", "J" = "j",
"K" = "k", "L" = "l", "M" = "m", "N" = "n", "O" = "o",
"P" = "p", "Q" = "q", "R" = "r", "S" = "s", "T" = "t",
"U" = "u", "V" = "v", "W" = "w", "X" = "x", "Y" = "y",
"Z" = "z")
lowercase_words <- str_replace_all(words, pattern = rep)
head(lowercase_words)
## [1] "a" "able" "about" "absolute" "accept" "account"
("apples, pears, and bananas") %>%
str_split(", +(and +)?") %>%
.[[1]]
## [1] "apples" "pears" "bananas"
test_sent <- "this car has been boosted and now develop a whopping '760hp'"
str_split(test_sent, " ")
## [[1]]
## [1] "this" "car" "has" "been" "boosted" "and"
## [7] "now" "develop" "a" "whopping" "'760hp'"
str_split(test_sent, boundary("word"))
## [[1]]
## [1] "this" "car" "has" "been" "boosted" "and"
## [7] "now" "develop" "a" "whopping" "760hp"
str_split(test_sent, "")
## [[1]]
## [1] "t" "h" "i" "s" " " "c" "a" "r" " " "h" "a" "s" " " "b" "e" "e" "n" " " "b"
## [20] "o" "o" "s" "t" "e" "d" " " "a" "n" "d" " " "n" "o" "w" " " "d" "e" "v" "e"
## [39] "l" "o" "p" " " "a" " " "w" "h" "o" "p" "p" "i" "n" "g" " " "'" "7" "6" "0"
## [58] "h" "p" "'"
str_subset(c("te\\st", "test"), "\\\\")
## [1] "te\\st"
str_subset(c("te\\st", "test"), fixed("\\"))
## [1] "te\\st"