library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.4
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.5 v dplyr 1.0.3
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(stringr)
string1 <- "This is a string"
string2 <- 'If I want to include a "quote" inside a string, I use single quotes'
double_quote <- "\"" # or '"'
single_quote <- '\'' # or "'"
x <- c("\"", "\\")
x
## [1] "\"" "\\"
#> [1] "\"" "\\"
writeLines(x)
## "
## \
#> "
#> \
x <- "\u00b5"
x
## [1] "µ"
#> [1] "µ"
c("one", "two", "three")
## [1] "one" "two" "three"
#> [1] "one" "two" "three"
str_length(c("a", "R for data science", NA))
## [1] 1 18 NA
#> [1] 1 18 NA
str_c("x", "y")
## [1] "xy"
#> [1] "xy"
str_c("x", "y", "z")
## [1] "xyz"
#> [1] "xyz"
str_c("x", "y", sep = ", ")
## [1] "x, y"
#> [1] "x, y"
x <- c("abc", NA)
str_c("|-", x, "-|")
## [1] "|-abc-|" NA
#> [1] "|-abc-|" NA
str_c("|-", str_replace_na(x), "-|")
## [1] "|-abc-|" "|-NA-|"
#> [1] "|-abc-|" "|-NA-|"
str_c("prefix-", c("a", "b", "c"), "-suffix")
## [1] "prefix-a-suffix" "prefix-b-suffix" "prefix-c-suffix"
name <- "Hadley"
time_of_day <- "morning"
birthday <- FALSE
str_c(
"Good ", time_of_day, " ", name,
if (birthday) " and HAPPY BIRTHDAY",
"."
)
## [1] "Good morning Hadley."
#> [1] "Good morning Hadley."
str_c(c("x", "y", "z"), collapse = ", ")
## [1] "x, y, z"
#> [1] "x, y, z"
x <- c("Apple", "Banana", "Pear")
str_sub(x, 1, 3)
## [1] "App" "Ban" "Pea"
#> [1] "App" "Ban" "Pea"
# negative numbers count backwards from end
str_sub(x, -3, -1)
## [1] "ple" "ana" "ear"
#> [1] "ple" "ana" "ear"
x <- c("Apple", "Banana", "Pear")
str_sub(x, 1,3)
## [1] "App" "Ban" "Pea"
# negative numbers count backwards from end
str_sub(x, -3,-1)
## [1] "ple" "ana" "ear"
#> [1] "ple" "ana" "ear
str_sub(x, 1, 1) <- str_to_lower(str_sub(x, 1, 1))
x
## [1] "apple" "banana" "pear"
#> [1] "apple" "banana" "pear"
# Turkish has two i's: with and without a dot, and it
# has a different rule for capitalising them:
str_to_upper(c("i", "ı"))
## [1] "I" "I"
#> [1] "I" "I"
str_to_upper(c("i", "ı"), locale = "tr")
## [1] "I" "I"
#> [1] "İ" "I"
x <- c("apple", "eggplant", "banana")
str_sort(x, locale = "en") # English
## [1] "apple" "banana" "eggplant"
#> [1] "apple" "banana" "eggplant"
str_sort(x, locale = "haw") # Hawaiian
## [1] "apple" "eggplant" "banana"
#> [1] "apple" "eggplant" "banana"
#14.2.5 Exercises #1. In code that doesn’t use stringr, you’ll often see paste() and paste0(). What’s the difference between the two functions? What stringr function are they equivalent to? How do the functions differ in their handling of NA?
#paste() adds white space between values but paste0() doesn’t add spaces between values. Both comands paste() and paste0() are similar to function str_c(); however they handle NAs differently. The function paste() includes NA as a value but str_c() removes the value.
#2. In your own words, describe the difference between the sep and collapse arguments to str_c(). #The sep function puts a separator such as “-” between items and collapse is used to combine input vectors into single string
paste("a", "b", "c", c("x", "y"))
## [1] "a b c x" "a b c y"
paste0("a", "b", "c", c("x", "y"))
## [1] "abcx" "abcy"
str_c(c("a", "b", "x"), c("x", "y"))
## Warning in stri_c(..., sep = sep, collapse = collapse, ignore_null = TRUE):
## longer object length is not a multiple of shorter object length
## [1] "ax" "by" "xx"
paste(c("a", "b"), c(NA, "y"))
## [1] "a NA" "b y"
str_c(c("a", "b"), c(NA, "y"))
## [1] NA "by"
str_c(c("a", "b", "c"), collapse = "")
## [1] "abc"
#3 Use str_length() and str_sub() to extract the middle character from a string. What will you do if the string has an even number of characters? # please see answer below
str_length(c("exercise"))
## [1] 8
x <- c("exercise")
str_sub(x, start = ceiling(str_length(x) / 2), end = ceiling(str_length(x) / 2))
## [1] "r"
#4 What does str_wrap() do? When might you want to use it? # str_wrap helps format paragraphs and it is helpful when you want to work with long texts. #
str_wrap("This is a test.", width = 3, indent = 3, exdent = 8
) %>%
writeLines()
## This
## is
## a
## test.
#5 What does str_trim() do? It removes the space from the left or right prior to the first or last character
str_trim(" this is a test ", side = "left")
## [1] "this is a test "
#6 Write a function that turns (e.g.) a vector c(“a”, “b”, “c”) into the string a, b, and c. Think carefully about what it should do if given a vector of length 0, 1, or 2
str_c("a", "b", "c", sep = ", ")
## [1] "a, b, c"
#14.3.1.1 Exercises #Explain why each of these strings don’t match a : “",”\“,”\". # “", this will escape the next character in the string. #”\" inserts a backslash #"\": The first two backslashes will insert a backslash, the third will escape the next character.
#How would you match the sequence "’ ?
#What patterns will the regular expression ...... match? How would you represent it as a string?
#14.3.2.1 Exercises #How would you match the literal string “\(^\)”? I would use “\” in between characters.
x <- "in this exercise we will use $^$"
str_view(x, "\\$\\^\\$")
#Given the corpus of common words in stringr::words, create regular expressions that find all words that: Start with “y” and End with “x”
str_view(stringr::words, "^y", match = TRUE)
str_view(stringr::words, "x$", match = TRUE)
str_view(stringr::words, "^...$", match = TRUE)
str_view(stringr::words, "^.......$", match = TRUE)
str_subset(stringr::words, "^[aeiou]")
## [1] "a" "able" "about" "absolute" "accept"
## [6] "account" "achieve" "across" "act" "active"
## [11] "actual" "add" "address" "admit" "advertise"
## [16] "affect" "afford" "after" "afternoon" "again"
## [21] "against" "age" "agent" "ago" "agree"
## [26] "air" "all" "allow" "almost" "along"
## [31] "already" "alright" "also" "although" "always"
## [36] "america" "amount" "and" "another" "answer"
## [41] "any" "apart" "apparent" "appear" "apply"
## [46] "appoint" "approach" "appropriate" "area" "argue"
## [51] "arm" "around" "arrange" "art" "as"
## [56] "ask" "associate" "assume" "at" "attend"
## [61] "authority" "available" "aware" "away" "awful"
## [66] "each" "early" "east" "easy" "eat"
## [71] "economy" "educate" "effect" "egg" "eight"
## [76] "either" "elect" "electric" "eleven" "else"
## [81] "employ" "encourage" "end" "engine" "english"
## [86] "enjoy" "enough" "enter" "environment" "equal"
## [91] "especial" "europe" "even" "evening" "ever"
## [96] "every" "evidence" "exact" "example" "except"
## [101] "excuse" "exercise" "exist" "expect" "expense"
## [106] "experience" "explain" "express" "extra" "eye"
## [111] "idea" "identify" "if" "imagine" "important"
## [116] "improve" "in" "include" "income" "increase"
## [121] "indeed" "individual" "industry" "inform" "inside"
## [126] "instead" "insure" "interest" "into" "introduce"
## [131] "invest" "involve" "issue" "it" "item"
## [136] "obvious" "occasion" "odd" "of" "off"
## [141] "offer" "office" "often" "okay" "old"
## [146] "on" "once" "one" "only" "open"
## [151] "operate" "opportunity" "oppose" "or" "order"
## [156] "organize" "original" "other" "otherwise" "ought"
## [161] "out" "over" "own" "under" "understand"
## [166] "union" "unit" "unite" "university" "unless"
## [171] "until" "up" "upon" "use" "usual"
str_view(stringr::words, "[aeiou]", match=FALSE)
str_subset(stringr::words, "[^e]ed$")
## [1] "bed" "hundred" "red"
str_subset(stringr::words, "i(ng|se)$")
## [1] "advertise" "bring" "during" "evening" "exercise" "king"
## [7] "meaning" "morning" "otherwise" "practise" "raise" "realise"
## [13] "ring" "rise" "sing" "surprise" "thing"
str_view(stringr::words, "q[^u]", match = TRUE)
str_view(stringr::words, "(l|b)our|parat", match = TRUE)
#Exercise 14.3.3.5 Create a regular expression that will match telephone numbers as commonly written in your country.
str_view(x, "[0-9][0-9][0-9]-[0-9][0-9][0-9]-[0-9][0-9][0-9][0-9]")
str_view(words, "^[^aeiou]{3}", match = TRUE)
str_view(words, "[aeiou]{3,}", match = TRUE)
str_view(words, "([aeiou][^aeiou]){2,}", match = TRUE)
#14.4.1.1 Exercises For each of the following challenges, try solving it by using both a single regular expression, and a combination of multiple str_detect() calls.
Find all words that start or end with x.
words[str_detect(words, "^x")]
## character(0)
words[str_detect(words, "x$")]
## [1] "box" "sex" "six" "tax"
Find all words that start with a vowel and end with a consonant.
words[str_detect(words, "^aeiou")]
## character(0)
words[str_detect(words, "[^aeiou]$")]
## [1] "about" "accept" "account" "across" "act"
## [6] "actual" "add" "address" "admit" "affect"
## [11] "afford" "after" "afternoon" "again" "against"
## [16] "agent" "air" "all" "allow" "almost"
## [21] "along" "already" "alright" "although" "always"
## [26] "amount" "and" "another" "answer" "any"
## [31] "apart" "apparent" "appear" "apply" "appoint"
## [36] "approach" "arm" "around" "art" "as"
## [41] "ask" "at" "attend" "authority" "away"
## [46] "awful" "baby" "back" "bad" "bag"
## [51] "ball" "bank" "bar" "basis" "bear"
## [56] "beat" "beauty" "bed" "begin" "behind"
## [61] "benefit" "best" "bet" "between" "big"
## [66] "bill" "birth" "bit" "black" "blood"
## [71] "blow" "board" "boat" "body" "book"
## [76] "both" "bother" "bottom" "box" "boy"
## [81] "break" "brief" "brilliant" "bring" "britain"
## [86] "brother" "budget" "build" "bus" "business"
## [91] "busy" "but" "buy" "by" "call"
## [96] "can" "car" "card" "carry" "cat"
## [101] "catch" "cent" "certain" "chair" "chairman"
## [106] "chap" "character" "cheap" "check" "child"
## [111] "Christ" "Christmas" "church" "city" "claim"
## [116] "class" "clean" "clear" "client" "clock"
## [121] "closes" "club" "cold" "collect" "colour"
## [126] "comment" "commit" "common" "community" "company"
## [131] "concern" "condition" "confer" "consider" "consult"
## [136] "contact" "contract" "control" "cook" "copy"
## [141] "corner" "correct" "cost" "could" "council"
## [146] "count" "country" "county" "court" "cover"
## [151] "cross" "cup" "current" "cut" "dad"
## [156] "danger" "day" "dead" "deal" "dear"
## [161] "decision" "deep" "department" "depend" "design"
## [166] "detail" "develop" "difficult" "dinner" "direct"
## [171] "discuss" "district" "doctor" "document" "dog"
## [176] "door" "doubt" "down" "draw" "dress"
## [181] "drink" "drop" "dry" "during" "each"
## [186] "early" "east" "easy" "eat" "economy"
## [191] "effect" "egg" "eight" "either" "elect"
## [196] "electric" "eleven" "employ" "end" "english"
## [201] "enjoy" "enough" "enter" "environment" "equal"
## [206] "especial" "even" "evening" "ever" "every"
## [211] "exact" "except" "exist" "expect" "explain"
## [216] "express" "fact" "fair" "fall" "family"
## [221] "far" "farm" "fast" "father" "favour"
## [226] "feed" "feel" "few" "field" "fight"
## [231] "fill" "film" "final" "find" "finish"
## [236] "first" "fish" "fit" "flat" "floor"
## [241] "fly" "follow" "food" "foot" "for"
## [246] "forget" "form" "forward" "four" "friday"
## [251] "friend" "from" "front" "full" "fun"
## [256] "function" "fund" "further" "garden" "gas"
## [261] "general" "germany" "get" "girl" "glass"
## [266] "god" "good" "govern" "grand" "grant"
## [271] "great" "green" "ground" "group" "grow"
## [276] "guess" "guy" "hair" "half" "hall"
## [281] "hand" "hang" "happen" "happy" "hard"
## [286] "head" "health" "hear" "heart" "heat"
## [291] "heavy" "hell" "help" "high" "history"
## [296] "hit" "hold" "holiday" "honest" "hospital"
## [301] "hot" "hour" "how" "however" "hundred"
## [306] "husband" "identify" "if" "important" "in"
## [311] "indeed" "individual" "industry" "inform" "instead"
## [316] "interest" "invest" "it" "item" "jesus"
## [321] "job" "join" "jump" "just" "keep"
## [326] "key" "kid" "kill" "kind" "king"
## [331] "kitchen" "knock" "know" "labour" "lad"
## [336] "lady" "land" "last" "laugh" "law"
## [341] "lay" "lead" "learn" "left" "leg"
## [346] "less" "let" "letter" "level" "light"
## [351] "likely" "limit" "link" "list" "listen"
## [356] "load" "local" "lock" "london" "long"
## [361] "look" "lord" "lot" "low" "luck"
## [366] "lunch" "main" "major" "man" "many"
## [371] "mark" "market" "marry" "match" "matter"
## [376] "may" "mean" "meaning" "meet" "member"
## [381] "mention" "might" "milk" "million" "mind"
## [386] "minister" "minus" "miss" "mister" "moment"
## [391] "monday" "money" "month" "morning" "most"
## [396] "mother" "motion" "mrs" "much" "music"
## [401] "must" "nation" "near" "necessary" "need"
## [406] "never" "new" "news" "next" "night"
## [411] "non" "normal" "north" "not" "now"
## [416] "number" "obvious" "occasion" "odd" "of"
## [421] "off" "offer" "often" "okay" "old"
## [426] "on" "only" "open" "opportunity" "or"
## [431] "order" "original" "other" "ought" "out"
## [436] "over" "own" "pack" "paint" "pair"
## [441] "paper" "paragraph" "pardon" "parent" "park"
## [446] "part" "particular" "party" "pass" "past"
## [451] "pay" "pension" "per" "percent" "perfect"
## [456] "perhaps" "period" "person" "photograph" "pick"
## [461] "plan" "play" "plus" "point" "policy"
## [466] "politic" "poor" "position" "post" "pound"
## [471] "power" "present" "press" "pretty" "previous"
## [476] "print" "problem" "proceed" "process" "product"
## [481] "project" "proper" "protect" "public" "pull"
## [486] "push" "put" "quality" "quarter" "question"
## [491] "quick" "quid" "quiet" "rail" "rather"
## [496] "read" "ready" "real" "really" "reason"
## [501] "recent" "reckon" "recommend" "record" "red"
## [506] "refer" "regard" "region" "relation" "remember"
## [511] "report" "represent" "research" "respect" "rest"
## [516] "result" "return" "rid" "right" "ring"
## [521] "road" "roll" "room" "round" "run"
## [526] "saturday" "say" "school" "scotland" "seat"
## [531] "second" "secretary" "section" "seem" "self"
## [536] "sell" "send" "serious" "set" "seven"
## [541] "sex" "shall" "sheet" "shoot" "shop"
## [546] "short" "should" "show" "shut" "sick"
## [551] "sign" "similar" "sing" "sir" "sister"
## [556] "sit" "six" "sleep" "slight" "slow"
## [561] "small" "social" "society" "son" "soon"
## [566] "sorry" "sort" "sound" "south" "speak"
## [571] "special" "specific" "speed" "spell" "spend"
## [576] "staff" "stairs" "stand" "standard" "start"
## [581] "station" "stay" "step" "stick" "still"
## [586] "stop" "story" "straight" "strategy" "street"
## [591] "strong" "student" "study" "stuff" "stupid"
## [596] "subject" "succeed" "such" "sudden" "suggest"
## [601] "suit" "summer" "sun" "sunday" "supply"
## [606] "support" "switch" "system" "talk" "tax"
## [611] "teach" "team" "television" "tell" "ten"
## [616] "tend" "term" "test" "than" "thank"
## [621] "then" "they" "thing" "think" "thirteen"
## [626] "thirty" "this" "though" "thousand" "through"
## [631] "throw" "thursday" "today" "together" "tomorrow"
## [636] "tonight" "top" "total" "touch" "toward"
## [641] "town" "traffic" "train" "transport" "travel"
## [646] "treat" "trust" "try" "tuesday" "turn"
## [651] "twenty" "under" "understand" "union" "unit"
## [656] "university" "unless" "until" "up" "upon"
## [661] "usual" "various" "very" "view" "visit"
## [666] "wait" "walk" "wall" "want" "war"
## [671] "warm" "wash" "watch" "water" "way"
## [676] "wear" "wednesday" "week" "weigh" "well"
## [681] "west" "what" "when" "whether" "which"
## [686] "why" "will" "win" "wind" "window"
## [691] "wish" "with" "within" "without" "woman"
## [696] "wonder" "wood" "word" "work" "world"
## [701] "worry" "worth" "would" "wrong" "year"
## [706] "yes" "yesterday" "yet" "young"
library("stringi")
stri_count_words((sentences))
## [1] 8 8 9 9 7 7 8 8 7 8 8 8 10 7 8 9 6 7 8 8 9 9 10 8 9
## [26] 7 7 8 8 6 7 8 8 7 7 8 7 7 8 6 9 9 7 8 8 7 9 7 7 5
## [51] 7 8 6 9 9 7 7 7 7 8 9 8 8 8 7 7 10 10 9 10 9 8 8 8 7
## [76] 7 9 8 9 8 7 8 7 8 7 7 6 6 6 8 7 9 8 9 7 8 8 8 6 7
## [101] 7 7 7 8 9 6 8 7 7 7 10 8 7 8 7 7 8 10 7 7 6 9 9 7 7
## [126] 8 9 9 8 7 9 7 6 7 10 7 9 8 8 9 7 7 7 7 7 5 8 10 10 9
## [151] 8 8 7 9 8 6 9 8 7 9 10 8 8 8 8 8 8 9 7 9 6 7 8 9 6
## [176] 8 7 8 9 8 6 7 7 9 7 8 7 7 10 8 9 9 7 8 10 7 9 8 7 8
## [201] 9 9 9 8 7 9 7 8 9 8 8 10 8 6 6 7 9 8 8 9 7 9 7 8 10
## [226] 8 8 7 8 7 8 8 9 10 8 7 6 8 7 8 10 9 10 7 8 7 9 7 6 8
## [251] 8 10 6 8 7 8 7 7 7 5 8 10 9 8 6 7 7 8 7 8 10 8 8 7 9
## [276] 8 8 7 8 8 9 9 7 8 8 7 11 7 7 7 9 8 8 8 9 9 8 9 11 7
## [301] 7 9 8 6 8 10 9 9 7 8 8 8 9 8 8 9 9 5 7 7 7 8 8 8 11
## [326] 8 6 7 7 8 7 9 8 8 7 9 8 9 9 7 10 8 11 8 7 9 8 8 9 5
## [351] 9 5 9 8 9 8 9 6 7 9 7 8 10 8 9 7 7 8 7 7 9 8 7 8 6
## [376] 8 6 9 6 7 8 7 6 6 6 9 6 8 7 9 7 7 8 9 7 8 8 6 8 7
## [401] 8 7 6 8 9 8 8 10 7 7 6 7 6 10 6 7 8 10 10 8 8 7 7 9 9
## [426] 8 9 9 6 7 9 7 9 9 7 7 9 8 6 7 8 9 7 8 6 9 8 8 8 8
## [451] 7 8 9 7 8 8 8 11 9 9 7 9 8 9 9 9 8 7 8 8 8 7 6 8 9
## [476] 9 9 8 8 7 10 9 8 8 10 8 10 7 8 11 10 9 7 8 8 9 9 6 10 8
## [501] 7 7 8 9 9 6 9 11 8 10 7 9 8 9 8 8 8 7 9 8 7 5 7 8 9
## [526] 6 8 9 6 7 10 8 8 8 8 8 8 8 8 10 7 11 7 10 9 11 8 10 6 9
## [551] 7 7 7 8 7 10 8 10 8 9 8 7 6 8 10 9 9 7 9 8 9 9 12 11 9
## [576] 8 7 10 10 8 8 10 8 7 8 10 6 8 7 6 7 8 7 10 7 7 5 9 8 8
## [601] 8 9 10 9 7 10 6 7 9 7 7 8 9 11 8 7 7 8 9 9 9 10 10 9 9
## [626] 11 9 9 6 7 8 11 9 9 8 9 9 5 6 8 8 6 8 10 8 8 8 8 8 8
## [651] 8 9 8 8 11 9 9 9 10 7 5 8 7 10 9 8 8 10 8 8 7 10 7 7 7
## [676] 7 7 8 8 8 9 7 6 8 9 8 9 8 7 8 9 12 8 9 9 9 9 8 9 11
## [701] 7 7 8 6 8 8 8 8 8 7 8 7 8 9 6 8 7 7 6 7
stri_duplicated(c("this", "this", "is", "a", "test",
"a", "test", "this", "is"))
## [1] FALSE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE
stri_rand_strings(1, 16)
## [1] "quoWbAh5m5scIdmU"
stri_rand_shuffle("This is test for this exercise.")
## [1] "et ixsT.ef rti sseehhsitcrsi o"