# sep
str_c("a", "b", "c", sep = ", ")
## [1] "a, b, c"
# collapse
str_c(c("a", "b", "c"), collapse = ", ")
## [1] "a, b, c"
x <- "honey bee"
mid_x <- str_length(x) / 2 + 0.5
str_sub(x, mid_x, mid_x)
## [1] "y"
y <- "bumble bee"
mid_y <- str_length(y) / 2
str_sub(y, mid_y, mid_y + 1)
## [1] "le"
str_warp():指定の文字数で改行する.
str_trim():whitespaceを文字列の始まりと終わりから削除する.
str_pad():str_trim()の逆.
q6 <- function(x){
len <- length(x)
if(len <= 1){
str_c(x, collapse = ", ")
}
else{
str_c(str_c(x[-len], collapse = ", "), x[len], sep = ", and ")
}
}
q6(c(""))
## [1] ""
q6(c("a"))
## [1] "a"
q6(c("a", "b"))
## [1] "a, and b"
q6(c("a", "b", "c"))
## [1] "a, b, and c"
\: "\", "\\", "\\\".\はescapeなので, \をescapeするには, \\とする必要があるが,
\\は\と認識されるので、\\\\とする必要がある.
"'\?x <- "a \"'\\b"
writeLines(x)
## a "'\b
str_view(x, "\"'\\\\")
\..\..\.. match? How would you represent it as a string?.◯.◯.◯のような文字列とマッチする.
x <- "a.b.c.d"
str_view(x, "\\..\\..\\..")
"$^$"?x <- "a$^$a"
str_view(x, "\\$\\^\\$")
1: Start with “y”.
str_view(words, "^y", match = TRUE)
2: End with “x”
str_view(words, "x$", match = TRUE)
3: Are exactly three letters long. (Don’t cheat by using str_length()!)
str_view(words, "\\b...\\b", match = TRUE)
4: Have seven letters or more.
str_view(words, ".......", match = TRUE)
1: Start with a vowel.
x <- c("apple", "banana")
str_view(x, "^[aiueo]", match = TRUE)
2: That only contain consonants. (Hint: thinking about matching “not”-vowels.)
x <- c("apple", "banana")
str_view(x, "^[^aiueo]", match = TRUE)
3: End with ed, but not with eed.
x <- c("seed", "played")
str_view(x, "^[e]ed$", match = TRUE)
4: End with ing or ise.
x <- c("eating", "noise")
str_view(x, "ing$ise$")
str_view(words, "q[^u]", match = TRUE)
"(\\d\\d\\d|\\d\\d)-(\\d\\d\\d|\\d\\d\\d\\d)-(\\d\\d\\d\\d)"
?, +, * in {m,n} form.? : {0, 1}
+ : {1, }
* : {0, }
1: ^.*$ -> 全ての文字列
2: "\\{.+\\}" -> {}に挟まれた1文字以上の文字列
3: \d{4}-\d{2}-\d{2} -> 4桁の数字 - 2桁の数字 - 2桁の数字 4: "\\\\{4}" -> \\\\
1: Start with three consonants.
str_view(words, "^[^aiueo]{3}", match = TRUE)
2: Have three or more vowels in a row.
str_view(words, "[aiueo]{3, }", match = TRUE)
3: Have two or more vowel-consonant pairs in a row.
str_view(words, "([aiueo][^aiueo]){2, }", match = TRUE)
1: (.)\1\1 -> 同じ文字が3回連続.
2: "(.)(.)\\2\\1" -> 2文字が逆の順番で連続. 3: (..)\1 -> 同じ2文字が2回連続.
4: "(.).\\1.\\1" -> abacaとかxyxzxみたいな文字列
5: "(.)(.)(.).*\\3\\2\\1" -> abcfghjcbaとかwxyrtyuiyxwみたいな文字列.
##### 2. Construct regular expressions to match words that:
1: Start and end with the same character.
str_view(words, "(.).*\\1$", match = TRUE)
2: Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.)
str_view(words, "(..).*\\1", match = TRUE)
3: Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)
str_view(words, "(.).*\\1.*\\1", match = FALSE)
1: Find all words that start or end with x.
# single
str_view(words, "^x.*x$", match = TRUE)
# combine
words[str_detect(words, "^x")] %>% str_view("x$", match = TRUE)
2: Find all words that start with a vowel and end with a consonant.
# single
str_view(words, "^[aiueo].*[^aiueo]$", match = TRUE)
# combine
words[str_detect(words, "^[aiueo]")] %>% str_view("[^aiueo]$", match = TRUE)
3: Are there any words that contain at least one of each different vowel?
df <- tibble(
word = words
)
df <- df %>% mutate(number = str_count(word, "[aiueo]"), prop = number / str_length(word))
# highest number
df %>% filter(number == max(df$number))
## # A tibble: 8 x 3
## word number prop
## <chr> <int> <dbl>
## 1 appropriate 5 0.455
## 2 associate 5 0.556
## 3 available 5 0.556
## 4 colleague 5 0.556
## 5 encourage 5 0.556
## 6 experience 5 0.5
## 7 individual 5 0.5
## 8 television 5 0.5
# prop
df %>% filter(prop == max(df$prop))
## # A tibble: 1 x 3
## word number prop
## <chr> <int> <dbl>
## 1 a 1 1
color <- c("red", "orange", "yellow", "green", "blue", "purple")
color_match <- str_c(color, collapse = "|")
1: The first word from each sentence.
str_extract(sentences, "^[a-zA-Z]+")
2: All words ending in ing.
str_extract_all(sentences, "[a-zA-Z]+ing")
3: All plurals.
str_extract_all(sentences, "[a-zA-Z]{3,}s")