library(tidyverse)

## -- Attaching packages ---------- tidyverse 1.3.0 --

## √ ggplot2 3.3.2     √ purrr   0.3.4
## √ tibble  3.0.3     √ dplyr   0.8.5
## √ tidyr   1.0.2     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.5.0

## -- Conflicts ------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

1.str_c

Join multiple strings into a single string.

用法：
str_c(…, sep = "", collapse = NULL)

sep: String to insert between input vectors.

collapse: Optional string used to combine input vectors into single string.If collapse = NULL (the default) a character vector with length equal to the longest input string. If collapse is non-NULL, a character vector of length 1

案例1

str_c(letters, collapse = " ")

## [1] "a b c d e f g h i j k l m n o p q r s t u v w x y z"

str_c(letters, collapse = "*")

## [1] "a*b*c*d*e*f*g*h*i*j*k*l*m*n*o*p*q*r*s*t*u*v*w*x*y*z"

str_c(letters, collapse = "&")

## [1] "a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z"

str_c(letters, sep  = " ")

##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
## [20] "t" "u" "v" "w" "x" "y" "z"

str_c(letters, sep = "*")

##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
## [20] "t" "u" "v" "w" "x" "y" "z"

str_c(letters, sep = "&")

##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
## [20] "t" "u" "v" "w" "x" "y" "z"

案例2

str_c(c("a", NA, "b"), "-d")

## [1] "a-d" NA    "b-d"

str_c(c("a", NA, "b"), sep="-d")

## [1] "a" NA  "b"

str_c(c("a", NA, "b"), collapse ="-d")

## [1] NA

str_c(c("a","b"), collapse ="-d")

## [1] "a-db"

str_c(c("我","和","你"),"-d")

## [1] "我-d" "和-d" "你-d"

str_c(c("我","和","你"),collapse="-d")

## [1] "我-d和-d你"

案例3

str_c("我","和","你",sep="-")

## [1] "我-和-你"

str_c(c("我","和","你"),sep="-")

## [1] "我" "和" "你"

str_c("我","和","你","-")

## [1] "我和你-"

str_c(c("我","和","你"),"-d")

## [1] "我-d" "和-d" "你-d"

str_c("我","和","你",collapse="-")

## [1] "我和你"

str_c(c("我","和","你"),collapse="-")

## [1] "我-和-你"

总结

collapse连接字符串中间，比如连接c(“我”,“和”,“你”)，代码：str_c(c(“我”,“和”,“你”),collapse=“-”)，效果：“我-和-你”。sep连接字符串与字符串之间，比如连接”我“,”和“,”你“，代码：str_c(”我“,”和“,”你“,sep=”-“)，效果：”我-和-你“。第三种情况，既不加collapse，也不加sep,见案例二，代码：str_c(c(”我“,”和“,”你“),”-d“)，效果：”我-d" “和-d” “你-d”

2.str_conv

Specify the encoding of a string.
指定字符串的编码

用法：
str_conv(string, encoding)

案例

x <- rawToChar(as.raw(177))
x

## [1] "\xb1"

str_conv(x, "ISO-8859-2") # Polish "a with ogonek"

## [1] "<U+0105>"

str_conv(x, "ISO-8859-1") # Plus-minus

## [1] "±"

3.str_count

Count the number of matches in a string.
计算字符串中的匹配数。

用法：
str_count(string, pattern = "")

案例1

fruit <- c("apple", "banana", "pear", "pineapple")
str_count(fruit, "a")#第一个单词里面几个a,第二个单词里面几个a，第三个……

## [1] 1 3 1 1

str_count(fruit, "p")

## [1] 2 0 1 3

str_count(fruit, "e")

## [1] 1 0 1 2

str_count(fruit, c("a", "b", "r", "b"))#第一个里面几个a,第二个里面几个b……

## [1] 1 1 1 0

案例2

str_count(c("a.", "...", ".a.a"), ".")

## [1] 2 3 4

str_count(c("a.", "...", ".a.a"), fixed("."))#数点的个数要加上fixed()

## [1] 1 3 2

str_count("This is, suprisingly, a sentence.", boundary("word"))#Match character, word, line and sentence boundaries with boundary(). An empty pattern, "", is equivalent to boundary("character").

## [1] 5

案例3

sent <- c("It's a nice day!")
str_count(sent,"a")

## [1] 2

sent <- c("我和你")
str_count(sent,"我")

## [1] 1

4.str_detect

Detect the presence or absence of a pattern in a string.
检测字符串中是否存在模式。

用法：
str_detect(string, pattern, negate = FALSE)
negate: If TRUE, return non-matching elements.

案例1

fruit <- c("apple", "banana", "pear", "pinapple")
str_detect(fruit, "a")

## [1] TRUE TRUE TRUE TRUE

str_detect(fruit, "^a")#a是否为单词里的第一个

## [1]  TRUE FALSE FALSE FALSE

str_detect(fruit, "a$")#a是否为单词里的最后一个

## [1] FALSE  TRUE FALSE FALSE

str_detect(fruit, "b")

## [1] FALSE  TRUE FALSE FALSE

str_detect(fruit, "[eiou]")#单词中是否含有eiou中任意一个

## [1]  TRUE FALSE  TRUE  TRUE

案例2

str_detect("aecfg", letters) #交换顺序也可

##  [1]  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE

案例3

str_detect(fruit, "^p", negate = TRUE)

## [1]  TRUE  TRUE FALSE FALSE

#negate: If TRUE, return non-matching elements.

5.str_dup

Duplicate and concatenate strings within a character vector.
在字符向量内复制和连接字符串。

用法：
str_dup(string, times)

案例1

fruit <- c("apple", "pear", "banana")
str_dup(fruit, 2)

## [1] "appleapple"   "pearpear"     "bananabanana"

str_dup(fruit, 1:3)

## [1] "apple"              "pearpear"           "bananabananabanana"

案例2

str_c("ba", str_dup("na", 0:5))

## [1] "ba"           "bana"         "banana"       "bananana"     "banananana"  
## [6] "bananananana"

str_dup("ba",1:6) #与上框代码同等效果

## [1] "ba"           "baba"         "bababa"       "babababa"     "bababababa"  
## [6] "babababababa"

str_c("别",str_dup("叨叨",1:3))

## [1] "别叨叨"         "别叨叨叨叨"     "别叨叨叨叨叨叨"

6.str_extract

Extract matching patterns from a string.
从字符串中提取匹配模式。

用法：
str_extract(string, pattern)
str_extract_all(string, pattern, simplify = FALSE)

simplify: If FALSE, the default, returns a list of character vectors. If TRUE returns a character matrix.

案例1

str_extract("wohen","w")

## [1] "w"

str_extract("wohen","we")

## [1] NA

案例2

shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk 2")
str_extract(shopping_list, "\\d")

## [1] "4" NA  NA  "2"

#\:将下一个字符标记为一个特殊字符、或一个原义字符、或一个向后引用、或一个八进制转义符。
#\d:匹配一个数字字符。等价于[0-9]。

str_extract_all(shopping_list, "\\d")

## [[1]]
## [1] "4"
## 
## [[2]]
## character(0)
## 
## [[3]]
## character(0)
## 
## [[4]]
## [1] "2"

str_extract_all(shopping_list, "\\d", simplify = TRUE)

##      [,1]
## [1,] "4" 
## [2,] ""  
## [3,] ""  
## [4,] "2"

str_extract(shopping_list, "[a-z]")#[a-z]   字符范围。匹配指定范围内的任意字符。例如，“[a-z]”可以匹配“a”到“z”范围内的任意小写字母字符。

## [1] "a" "b" "b" "m"

str_extract(shopping_list, "[a-z]+")#每组词组里的第一个字母所在的单词，[a-z]+ 指定的，包括一个或多个字母。

## [1] "apples" "bag"    "bag"    "milk"

str_extract_all(shopping_list, "[a-z]")#选取所有字母

## [[1]]
## [1] "a" "p" "p" "l" "e" "s" "x"
## 
## [[2]]
##  [1] "b" "a" "g" "o" "f" "f" "l" "o" "u" "r"
## 
## [[3]]
##  [1] "b" "a" "g" "o" "f" "s" "u" "g" "a" "r"
## 
## [[4]]
## [1] "m" "i" "l" "k"

str_extract_all(shopping_list, "[a-z]+")# 每组词组里的所有字母所在的单词

## [[1]]
## [1] "apples" "x"     
## 
## [[2]]
## [1] "bag"   "of"    "flour"
## 
## [[3]]
## [1] "bag"   "of"    "sugar"
## 
## [[4]]
## [1] "milk"

str_extract(shopping_list, "[a-z]{1,2}")#每组词组里面第一个字母所在单词取两个字母，注意当为+时，可以理解为取所有

## [1] "ap" "ba" "ba" "mi"

str_extract(shopping_list, "[a-z]{1,4}")#每组词组里面第一个字母所在单词取四个字母

## [1] "appl" "bag"  "bag"  "milk"

案例3（不懂）

str_extract(shopping_list, "\\b[a-z]{1,4}\\b")#\b:匹配一个单词边界，也就是指单词和空格间的位置。例如，“er\b”可以匹配“never”中的“er”，但不能匹配“verb”中的“er”。

## [1] NA     "bag"  "bag"  "milk"

str_extract(shopping_list, "\\b[a-z]{1,2}\\b")

## [1] NA   "of" "of" NA

str_extract(shopping_list, "\\b[a-z]{1,3}\\b")

## [1] NA    "bag" "bag" NA

str_extract(shopping_list, "[a-z]{1,3}\\b")#取第一个单词中的后三个字母

## [1] "les" "bag" "bag" "ilk"

str_extract(shopping_list, "\\b[a-z]{1,4}")#取第一个单词中的前四个字母

## [1] "appl" "bag"  "bag"  "milk"

案例4

str_extract_all(shopping_list, "\\b[a-z]+\\b")

## [[1]]
## [1] "apples"
## 
## [[2]]
## [1] "bag"   "of"    "flour"
## 
## [[3]]
## [1] "bag"   "of"    "sugar"
## 
## [[4]]
## [1] "milk"

str_extract_all(shopping_list, "\\b[a-z]+\\b", simplify = TRUE)

##      [,1]     [,2] [,3]   
## [1,] "apples" ""   ""     
## [2,] "bag"    "of" "flour"
## [3,] "bag"    "of" "sugar"
## [4,] "milk"   ""   ""

案例5

str_extract_all("This is, suprisingly, a sentence.", boundary("word"))

## [[1]]
## [1] "This"        "is"          "suprisingly" "a"           "sentence"

##案例6

shp <- c("苹果 x4","糖果包","ipad air 2")

str_extract(shp,"\\d")

## [1] "4" NA  "2"

str_extract(shp, "[a-z]")

## [1] "x" NA  "i"

str_extract(shp, "[a-z]+")

## [1] "x"    NA     "ipad"

str_extract(shp, "[a-z]{1,2}")

## [1] "x"  NA   "ip"

str_extract(shp, "\\b[a-z]{1,3}\\b")

## [1] NA    NA    "air"

str_extract(shp, "\\b[a-z]{1,4}\\b")

## [1] NA     NA     "ipad"

str_extract(shp, "\\b[a-z]{1,3}\\b")

## [1] NA    NA    "air"

str_extract(shp, "\\b[a-z]{1,2}\\b")

## [1] NA NA NA

str_extract(shp, "[a-z]{1,3}\\b")

## [1] NA    NA    "pad"

str_extract(shp, "\\b[a-z]{1,3}")

## [1] "x"   NA    "ipa"

7.str_flatten

Flatten a string 展平字符串。

用法：
str_flatten(string, collapse = "")

案例1

str_flatten(letters)

## [1] "abcdefghijklmnopqrstuvwxyz"

str_flatten(letters, "-")

## [1] "a-b-c-d-e-f-g-h-i-j-k-l-m-n-o-p-q-r-s-t-u-v-w-x-y-z"

str_flatten(LETTERS, collapse = "*")

## [1] "A*B*C*D*E*F*G*H*I*J*K*L*M*N*O*P*Q*R*S*T*U*V*W*X*Y*Z"

案例2

str_flatten(c("a","b","c"),",")

## [1] "a,b,c"

8.str_glue

Format and interpolate a string with glue. 用glue格式化和插入字符串。

用法：
str_glue(…, .sep = "“, .envir = parent.frame())
str_glue_data(.x, …, .sep =”“, .envir = parent.frame(),.na =”NA")

.sep: [character(1): ‘""’] Separator used to separate elements.分隔符用于分隔元素。

.envir: [environment: parent.frame()] Environment to evaluate each expression in. Expressions are evaluated from left to right. If .x is an environment, the expressions are evaluated in that environment and .envir is ignored.用于评估每个表达式的环境。从左到右评估表达式。

.x: [listish] An environment, list or data frame used to lookup values.

.na: [character(1): ‘NA’] Value to replace NA values with. If NULL missing values are propagated, that is an NA result will cause NA output. Otherwise the value is replaced by the value of .na.

案例1

name <- "Fred"
age <- 50
anniversary <- as.Date("1991-10-12")
str_glue(
"My name is {name}, ",
"my age next year is {age + 1}, ",
"and my anniversary is {format(anniversary, '%A, %B %d, %Y')}."
)

## My name is Fred, my age next year is 51, and my anniversary is 星期六, 十月 12, 1991.

str_glue("My name is {name}, not {{name}}.")

## My name is Fred, not {name}.

str_glue(
"My name is {name}, ",
"and my age next year is {age + 1}.",
name = "Joe",
age = 40
)

## My name is Joe, and my age next year is 41.

案例2

x <- "张三"
y <- "北风网"
z <- "北京海定"
str_glue(x,y,z)

## 张三北风网北京海定

str_glue(x,y,z,.sep = "-")

## 张三-北风网-北京海定

note: str_glue_data() is useful in data pipelines
mtcars %>% str_glue_data(“{rownames(.)} has {hp} hp”)

9.str_length

从技术上讲，这将以字符串形式返回“代码点”的数量。一个代码点通常对应一个字符，但并非总是如此。例如，带有变音符号的u可能表示为单个字符或u和变音符号的组合。

用法：
str_length(string)

案例1

str_length(letters)

##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

str_length(NA)

## [1] NA

str_length(factor("abc"))

## [1] 3

str_length(c("i", "like", "programming", NA))

## [1]  1  4 11 NA

案例2

u1 <- "\u00fc"
u1

## [1] "ü"

u2 <- stringi::stri_trans_nfd(u1)
u2

## [1] "u<U+0308>"

str_length(u1)

## [1] 1

str_length(u2)

## [1] 2

str_count(u1)

## [1] 1

str_count(u2)

## [1] 1

10.str_locate

Locate the position of patterns in a string.

用法：
str_locate(string, pattern) str_locate_all(string, pattern)

对于str_locate，是一个整数矩阵。第一列给出比赛的开始位置，第二列给出给出最终位置。对于str_locate_all，是整数矩阵的列表。

str_extract() for a convenient way of extracting matches, stringi::stri_locate() for the underlying implementation.

str_extract（）用于提取匹配项的便捷方法，stringi ::stri_locate（）用于提取匹配项基础实施。

案例1

fruit <- c("apple", "banana", "pear", "pineapple")
str_locate(fruit, "$")#$:匹配输入字符串的结束位置。

##      start end
## [1,]     6   5
## [2,]     7   6
## [3,]     5   4
## [4,]    10   9

str_locate(fruit, "a")

##      start end
## [1,]     1   1
## [2,]     2   2
## [3,]     3   3
## [4,]     5   5

str_locate(fruit, "e")

##      start end
## [1,]     5   5
## [2,]    NA  NA
## [3,]     2   2
## [4,]     4   4

str_locate(fruit, c("a", "b", "p", "p"))

##      start end
## [1,]     1   1
## [2,]     1   1
## [3,]     1   1
## [4,]     1   1

str_locate_all(fruit, "a")

## [[1]]
##      start end
## [1,]     1   1
## 
## [[2]]
##      start end
## [1,]     2   2
## [2,]     4   4
## [3,]     6   6
## 
## [[3]]
##      start end
## [1,]     3   3
## 
## [[4]]
##      start end
## [1,]     5   5

str_locate_all(fruit, "e")

## [[1]]
##      start end
## [1,]     5   5
## 
## [[2]]
##      start end
## 
## [[3]]
##      start end
## [1,]     2   2
## 
## [[4]]
##      start end
## [1,]     4   4
## [2,]     9   9

str_locate_all(fruit, c("a", "b", "p", "p"))

## [[1]]
##      start end
## [1,]     1   1
## 
## [[2]]
##      start end
## [1,]     1   1
## 
## [[3]]
##      start end
## [1,]     1   1
## 
## [[4]]
##      start end
## [1,]     1   1
## [2,]     6   6
## [3,]     7   7

# Find location of every character
str_locate_all(fruit, "")

## [[1]]
##      start end
## [1,]     1   1
## [2,]     2   2
## [3,]     3   3
## [4,]     4   4
## [5,]     5   5
## 
## [[2]]
##      start end
## [1,]     1   1
## [2,]     2   2
## [3,]     3   3
## [4,]     4   4
## [5,]     5   5
## [6,]     6   6
## 
## [[3]]
##      start end
## [1,]     1   1
## [2,]     2   2
## [3,]     3   3
## [4,]     4   4
## 
## [[4]]
##       start end
##  [1,]     1   1
##  [2,]     2   2
##  [3,]     3   3
##  [4,]     4   4
##  [5,]     5   5
##  [6,]     6   6
##  [7,]     7   7
##  [8,]     8   8
##  [9,]     9   9

案例2

x <- c("张三北风网excel","李四四北风网python")
str_locate(x,"北")

##      start end
## [1,]     3   3
## [2,]     4   4

11.str_match

Extract matched groups from a string.
从字符串中提取匹配的组

用法：
str_match(string, pattern) str_match_all(string, pattern)

对于str_match，是一个字符矩阵。第一列是完全匹配，第二列是对于每个捕获组。对于str_match_all，字符矩阵列表。

str_extract() to extract the complete match,stringi::stri_match() for the underlying implementation.

案例1（不懂）

strings <- c(" 219 733 8965", "329-293-8753 ", "banana", "595 794 7569",
"387 287 6718", "apple", "233.398.9187 ", "482 952 3315",
"239 923 8115 and 842 566 4692", "Work: 579-499-7527", "$1000",
"Home: 543.355.3679")
phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"

str_extract(strings, phone)

##  [1] "219 733 8965" "329-293-8753" NA             "595 794 7569" "387 287 6718"
##  [6] NA             "233.398.9187" "482 952 3315" "239 923 8115" "579-499-7527"
## [11] NA             "543.355.3679"

str_match(strings, phone)

##       [,1]           [,2]  [,3]  [,4]  
##  [1,] "219 733 8965" "219" "733" "8965"
##  [2,] "329-293-8753" "329" "293" "8753"
##  [3,] NA             NA    NA    NA    
##  [4,] "595 794 7569" "595" "794" "7569"
##  [5,] "387 287 6718" "387" "287" "6718"
##  [6,] NA             NA    NA    NA    
##  [7,] "233.398.9187" "233" "398" "9187"
##  [8,] "482 952 3315" "482" "952" "3315"
##  [9,] "239 923 8115" "239" "923" "8115"
## [10,] "579-499-7527" "579" "499" "7527"
## [11,] NA             NA    NA    NA    
## [12,] "543.355.3679" "543" "355" "3679"

str_extract_all(strings, phone)

## [[1]]
## [1] "219 733 8965"
## 
## [[2]]
## [1] "329-293-8753"
## 
## [[3]]
## character(0)
## 
## [[4]]
## [1] "595 794 7569"
## 
## [[5]]
## [1] "387 287 6718"
## 
## [[6]]
## character(0)
## 
## [[7]]
## [1] "233.398.9187"
## 
## [[8]]
## [1] "482 952 3315"
## 
## [[9]]
## [1] "239 923 8115" "842 566 4692"
## 
## [[10]]
## [1] "579-499-7527"
## 
## [[11]]
## character(0)
## 
## [[12]]
## [1] "543.355.3679"

str_match_all(strings, phone)

## [[1]]
##      [,1]           [,2]  [,3]  [,4]  
## [1,] "219 733 8965" "219" "733" "8965"
## 
## [[2]]
##      [,1]           [,2]  [,3]  [,4]  
## [1,] "329-293-8753" "329" "293" "8753"
## 
## [[3]]
##      [,1] [,2] [,3] [,4]
## 
## [[4]]
##      [,1]           [,2]  [,3]  [,4]  
## [1,] "595 794 7569" "595" "794" "7569"
## 
## [[5]]
##      [,1]           [,2]  [,3]  [,4]  
## [1,] "387 287 6718" "387" "287" "6718"
## 
## [[6]]
##      [,1] [,2] [,3] [,4]
## 
## [[7]]
##      [,1]           [,2]  [,3]  [,4]  
## [1,] "233.398.9187" "233" "398" "9187"
## 
## [[8]]
##      [,1]           [,2]  [,3]  [,4]  
## [1,] "482 952 3315" "482" "952" "3315"
## 
## [[9]]
##      [,1]           [,2]  [,3]  [,4]  
## [1,] "239 923 8115" "239" "923" "8115"
## [2,] "842 566 4692" "842" "566" "4692"
## 
## [[10]]
##      [,1]           [,2]  [,3]  [,4]  
## [1,] "579-499-7527" "579" "499" "7527"
## 
## [[11]]
##      [,1] [,2] [,3] [,4]
## 
## [[12]]
##      [,1]           [,2]  [,3]  [,4]  
## [1,] "543.355.3679" "543" "355" "3679"

案例2（不懂）

x <- c("<a> <b>", "<a> <>", "<a>", "", NA)

str_match(x, "<(.*?)> <(.*?)>")

##      [,1]      [,2] [,3]
## [1,] "<a> <b>" "a"  "b" 
## [2,] "<a> <>"  "a"  ""  
## [3,] NA        NA   NA  
## [4,] NA        NA   NA  
## [5,] NA        NA   NA

str_match_all(x, "<(.*?)>")

## [[1]]
##      [,1]  [,2]
## [1,] "<a>" "a" 
## [2,] "<b>" "b" 
## 
## [[2]]
##      [,1]  [,2]
## [1,] "<a>" "a" 
## [2,] "<>"  ""  
## 
## [[3]]
##      [,1]  [,2]
## [1,] "<a>" "a" 
## 
## [[4]]
##      [,1] [,2]
## 
## [[5]]
##      [,1] [,2]
## [1,] NA   NA

str_extract(x, "<.*?>")

## [1] "<a>" "<a>" "<a>" NA    NA

str_extract_all(x, "<.*?>")

## [[1]]
## [1] "<a>" "<b>"
## 
## [[2]]
## [1] "<a>" "<>" 
## 
## [[3]]
## [1] "<a>"
## 
## [[4]]
## character(0)
## 
## [[5]]
## [1] NA

案例3

x <- c("a","b","c")

str_match(x,"a")

##      [,1]
## [1,] "a" 
## [2,] NA  
## [3,] NA

str_match(x,"ab")

##      [,1]
## [1,] NA  
## [2,] NA  
## [3,] NA

str_match_all(x,"a")

## [[1]]
##      [,1]
## [1,] "a" 
## 
## [[2]]
##      [,1]
## 
## [[3]]
##      [,1]

str_match_all(x,"ab")

## [[1]]
##      [,1]
## 
## [[2]]
##      [,1]
## 
## [[3]]
##      [,1]

str_extract(x,"a")

## [1] "a" NA  NA

str_extract(x,"ab")

## [1] NA NA NA

str_extract_all(x,"a")

## [[1]]
## [1] "a"
## 
## [[2]]
## character(0)
## 
## [[3]]
## character(0)

str_extract_all(x,"ab")

## [[1]]
## character(0)
## 
## [[2]]
## character(0)
## 
## [[3]]
## character(0)

12.str_order

Order or sort a character vector.排序

用法：
str_order(x, decreasing = FALSE, na_last = TRUE, locale = “en”, numeric = FALSE, …)
str_sort(x, decreasing = FALSE, na_last = TRUE, locale = “en”, numeric = FALSE, …)

decreasing: A boolean. If FALSE, the default, sorts from lowest to highest; if TRUE sorts from highest to lowest.

na_last: Where should NA go? TRUE at the end, FALSE at the beginning, NA dropped.

locale: In which locale should the sorting occur? Defaults to the English. This ensures that code behaves the same way across platforms.

numeric: If TRUE, will sort digits numerically, instead of as strings.

案例1

str_order(letters)

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26

str_sort(letters)

##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
## [20] "t" "u" "v" "w" "x" "y" "z"

str_order(letters, locale = "haw")

##  [1]  1  5  9 15 21  2  3  4  6  7  8 10 11 12 13 14 16 17 18 19 20 22 23 24 25
## [26] 26

#locale:In which locale should the sorting occur? Defaults to the English. This ensures that code behaves the same way across platforms. 排序应在哪个语言环境中进行？默认为英语。这样可以确保代码在各个平台上的行为相同。

str_sort(letters, locale = "haw")

##  [1] "a" "e" "i" "o" "u" "b" "c" "d" "f" "g" "h" "j" "k" "l" "m" "n" "p" "q" "r"
## [20] "s" "t" "v" "w" "x" "y" "z"

案例2

x <- c("100a10", "100a5", "2b", "2a")
str_sort(x)

## [1] "100a10" "100a5"  "2a"     "2b"

str_order(x)

## [1] 1 2 4 3

str_sort(x, numeric = TRUE)

## [1] "2a"     "2b"     "100a5"  "100a10"

#numeric: If TRUE, will sort digits numerically, instead of as strings.Other options used to control sorting order.

str_order(x,numeric = T)

## [1] 4 3 2 1

案例3

y <- c("good","cook","book","ebook",NA,"edog")
str_sort(y,na_last = F)

## [1] NA      "book"  "cook"  "ebook" "edog"  "good"

str_order(y,na_last = F)

## [1] 5 3 2 4 6 1

13.str_pad

Pad a string.

用法：
str_pad(string, width, side = c(“left”, “right”, “both”), pad = " ")

str_trim() to remove whitespace; str_trunc() to decrease the maximum width of a string. str_trim（）删除空格； str_trunc（）减小字符串的最大宽度。

##案例1

rbind(
str_pad("hadley", 30, "left"),
str_pad("hadley", 30, "right"),
str_pad("hadley", 30, "both")
)

##      [,1]                            
## [1,] "                        hadley"
## [2,] "hadley                        "
## [3,] "            hadley            "

str_pad(c("a", "abc", "abcdef"), 10)

## [1] "         a" "       abc" "    abcdef"

str_pad("a", c(5, 10, 20))

## [1] "    a"                "         a"           "                   a"

str_pad("a", 10, pad = c("-", "_", " "))

## [1] "---------a" "_________a" "         a"

# Longer strings are returned unchanged
str_pad("hadley", 3)

## [1] "hadley"

案例2

str_pad("我",10,pad="-")

## [1] "--------我"

str_pad("鲁迅",10,pad="_")

## [1] "______鲁迅"

14.str_remove

Remove matched patterns in a string.

用法：
str_remove(string, pattern) str_remove_all(string, pattern)

Alias for str_replace(string, pattern, "“) str_replace的别名（字符串，模式，“”）

str_replace() for the underlying implementation str_replace（）用于基础实现

案例1

fruits <- c("one apple", "two pears", "three bananas")
str_remove(fruits, "[aeiou]")

## [1] "ne apple"     "tw pears"     "thre bananas"

str_remove_all(fruits, "[aeiou]")

## [1] "n ppl"    "tw prs"   "thr bnns"

案例2

str_remove("我和你","我")

## [1] "和你"

15.str_replace & str_replace_na

Replace matched patterns in a string.

用法：
str_replace(string, pattern, replacement) str_replace_all(string, pattern, replacement)

pattern Pattern to look for. The default interpretation is a regular expression, as described in stringi::stringisearch-regex. Control options with regex(). Match a fixed string (i.e. by comparing only bytes), using fixed(). This is fast, but approximate. Generally, for matching human text, you’ll want coll() which respects character matching rules for the specified locale.

str_replace_na() to turn missing values into “NA”; stri_replace() for the underlying implementation.

案例1

fruits <- c("one apple", "two pears", "three bananas")
str_replace(fruits, "[aeiou]", "-")#替换每组词中第一个属于【aeiou】的字母

## [1] "-ne apple"     "tw- pears"     "thr-e bananas"

str_replace_all(fruits, "[aeiou]", "-")#替换每组词中每一个属于【aeiou】的字母

## [1] "-n- -ppl-"     "tw- p--rs"     "thr-- b-n-n-s"

str_replace_all(fruits, "[aeiou]", toupper)

## [1] "OnE ApplE"     "twO pEArs"     "thrEE bAnAnAs"

案例2

str_replace_all(fruits, "b", NA_character_)

## [1] "one apple" "two pears" NA

str_replace(fruits, "([aeiou])", "")

## [1] "ne apple"     "tw pears"     "thre bananas"

案例3（不懂）

str_replace(fruits, "([aeiou])", "\\1\\1")#？？

## [1] "oone apple"     "twoo pears"     "threee bananas"

案例4

str_replace(fruits, "[aeiou]", c("1", "2", "3"))

## [1] "1ne apple"     "tw2 pears"     "thr3e bananas"

str_replace(fruits, c("a", "e", "i"), "-")

## [1] "one -pple"     "two p-ars"     "three bananas"

案例5

# If you want to apply multiple patterns and replacements to the same
# string, pass a named vector to pattern.
fruits %>%
str_c(collapse = "---") %>%
str_replace_all(c("one" = "1", "two" = "2", "three" = "3"))

## [1] "1 apple---2 pears---3 bananas"

x <- str_c(fruits,"---")
x

## [1] "one apple---"     "two pears---"     "three bananas---"

str_replace_all(x,c("one" = "1", "two" = "2", "three" = "3"))

## [1] "1 apple---"   "2 pears---"   "3 bananas---"

fruits %>%
str_c(collapse = "---")

## [1] "one apple---two pears---three bananas"

y <- fruits %>%
str_c(collapse = "---")
str_replace_all(y,c("one" = "1", "two" = "2", "three" = "3"))

## [1] "1 apple---2 pears---3 bananas"

案例6

str_replace_na(c(NA, "abc", "def"),"1")

## [1] "1"   "abc" "def"

16.str_split

Split up a string into pieces. 将一串切成小块。

用法：
str_split(string, pattern, n = Inf, simplify = FALSE) str_split_fixed(string, pattern, n)

simplify： If FALSE, the default, returns a list of character vectors. If TRUE returns a character matrix.

For str_split_fixed, a character matrix with n columns. For str_split, a list of character vectors.

案例1

fruits <- c(
"apples and oranges and pears and bananas",
"pineapples and mangos and guavas"
)

str_split(fruits, " and ")

## [[1]]
## [1] "apples"  "oranges" "pears"   "bananas"
## 
## [[2]]
## [1] "pineapples" "mangos"     "guavas"

str_split(fruits, " and ", simplify = TRUE)

##      [,1]         [,2]      [,3]     [,4]     
## [1,] "apples"     "oranges" "pears"  "bananas"
## [2,] "pineapples" "mangos"  "guavas" ""

# Specify n to restrict the number of possible matches
str_split(fruits, " and ", n = 3)

## [[1]]
## [1] "apples"            "oranges"           "pears and bananas"
## 
## [[2]]
## [1] "pineapples" "mangos"     "guavas"

str_split(fruits, " and ", n = 2)

## [[1]]
## [1] "apples"                        "oranges and pears and bananas"
## 
## [[2]]
## [1] "pineapples"        "mangos and guavas"

# If n greater than number of pieces, no padding occurs
str_split(fruits, " and ", n = 5)

## [[1]]
## [1] "apples"  "oranges" "pears"   "bananas"
## 
## [[2]]
## [1] "pineapples" "mangos"     "guavas"

# Use fixed to return a character matrix
str_split_fixed(fruits, " and ", 3)

##      [,1]         [,2]      [,3]               
## [1,] "apples"     "oranges" "pears and bananas"
## [2,] "pineapples" "mangos"  "guavas"

str_split_fixed(fruits, " and ", 4)

##      [,1]         [,2]      [,3]     [,4]     
## [1,] "apples"     "oranges" "pears"  "bananas"
## [2,] "pineapples" "mangos"  "guavas" ""

案例2

x <- c("你，我他")
str_split(x,"，",n=2)#注意：此处逗号为中文

## [[1]]
## [1] "你"   "我他"

x <- c("你和我和他")
str_split_fixed(x,"和",n=3)#此处一定要有n=3，否则报错

##      [,1] [,2] [,3]
## [1,] "你" "我" "他"

str_split(x,"和")

## [[1]]
## [1] "你" "我" "他"

17.str_starts & str_ends

Detect the presence or absence of a pattern at the beginning or end of a string. 检测字符串开头或结尾是否存在模式。

用法：
str_starts(string, pattern, negate = FALSE) str_ends(string, pattern, negate = FALSE)

negate: If TRUE, return non-matching elements.

案例1

fruit <- c("apple", "banana", "pear", "pinapple")

str_starts(fruit, "p")

## [1] FALSE FALSE  TRUE  TRUE

str_starts(fruit, "p", negate = TRUE)

## [1]  TRUE  TRUE FALSE FALSE

str_ends(fruit, "e")

## [1]  TRUE FALSE FALSE  TRUE

str_ends(fruit, "e", negate = TRUE)

## [1] FALSE  TRUE  TRUE FALSE

案例2

str_starts(c("我和你","认识"),"我")

## [1]  TRUE FALSE

18.str_sub

Extract and replace substrings from a character vector. 从字符向量中提取并替换子字符串。

用法：
str_sub(string, start = 1L, end = -1L) str_sub(string, start = 1L, end = -1L, omit_na = FALSE)<- value

omit_na: Single logical value. If TRUE, missing values in any of the arguments provided will result in an unchanged input.

案例1

hw <- "Hadley Wickham"

str_sub(hw, 3, 5)

## [1] "dle"

str_sub(hw, end = 8)

## [1] "Hadley W"

str_sub(hw, 6, 14)

## [1] "y Wickham"

str_sub(hw, 9)#从第九位到结束，注意，空格也是一位。

## [1] "ickham"

str_sub(hw, c(2, 6), c(8, 14))#开始分别是第二位和第六位，结束分别是第八和十四位。

## [1] "adley W"   "y Wickham"

# Negative indices
str_sub(hw, -2)#倒数两位

## [1] "am"

str_sub(hw, -8)#倒数8位

## [1] " Wickham"

str_sub(hw, end = -7)#从第一位到第倒数七位。

## [1] "Hadley W"

案例2

# output from str_locate_all
pos <- str_locate_all(hw, "[aeio]")[[1]]
pos

##      start end
## [1,]     2   2
## [2,]     5   5
## [3,]     9   9
## [4,]    13  13

str_sub(hw, pos)

## [1] "a" "e" "i" "a"

str_sub(hw, pos[, 1], pos[, 2])

## [1] "a" "e" "i" "a"

案例3

sent <- c("我来自中国")
str_sub(sent,1,3)

## [1] "我来自"

str_sub(sent,4)

## [1] "中国"

str_sub(sent,end = 4)

## [1] "我来自中"

str_sub(sent,start = -2)

## [1] "中国"

str_sub(sent, -2)

## [1] "中国"

str_sub(sent,end = -2)

## [1] "我来自中"

sent <- c("我","来","自","中","国")
str_sub(sent,end=2)#失败案例

## [1] "我" "来" "自" "中" "国"

19.str_subset & str_which

Keep strings matching a pattern, or find positions.
保持字符串与模式匹配，或找到位置。

用法：
str_subset(string, pattern, negate = FALSE)
str_which(string, pattern, negate = FALSE)

案例1

fruit <- c("apple", "banana", "pear", "pinapple")
str_subset(fruit, "a")

## [1] "apple"    "banana"   "pear"     "pinapple"

str_which(fruit, "a")

## [1] 1 2 3 4

str_subset(fruit, "^a")

## [1] "apple"

str_subset(fruit, "a$")

## [1] "banana"

str_subset(fruit, "b")

## [1] "banana"

str_subset(fruit, "[aeiou]")

## [1] "apple"    "banana"   "pear"     "pinapple"

# Returns elements that do NOT match
str_subset(fruit, "^p", negate = TRUE)

## [1] "apple"  "banana"

# Missings never match
str_subset(c("a", NA, "b"), ".")

## [1] "a" "b"

str_which(c("a", NA, "b"), ".")

## [1] 1 3

案例2

sent <- c("我来自中国","我来自英国")
str_subset(sent,"我")

## [1] "我来自中国" "我来自英国"

str_subset(sent,"中")

## [1] "我来自中国"

20.str_trim & str_squish

Trim whitespace from a string。从字符串修剪空格。

用法：
str_trim(string, side = c(“both”, “left”, “right”)) str_squish(string) str_pad() to add whitespace

案例1

str_trim(" String with trailing and leading white space\t")

## [1] "String with trailing and leading white space"

#\t 匹配一个制表符

str_trim(" String with trailing and leading white space")

## [1] "String with trailing and leading white space"

str_trim("\n\nString with trailing and leading white space\n\n")# \n    标识一个八进制转义值或一个向后引用。

## [1] "String with trailing and leading white space"

str_trim("String with trailing and leading white space")

## [1] "String with trailing and leading white space"

str_squish(" String with trailing, middle, and leading white space\t")

## [1] "String with trailing, middle, and leading white space"

str_squish("\n\nString with excess, trailing and leading white space\n\n")

## [1] "String with excess, trailing and leading white space"

案例2

t <- " 我来 自中国 "
str_trim(t)

## [1] "我来 自中国"

str_squish(t)

## [1] "我来 自中国"

str_trim(t,side = c("both","left","right"))

## [1] "我来 自中国"

21.str_trunc

Truncate a character string.截断字符串。

用法：
str_trunc(string, width, side = c(“right”, “left”, “center”), ellipsis = “…”)

side, ellipsis: Location and content of ellipsis that indicates content has been removed.

str_pad() to increase the minimum width of a string.

案例1

x <- "This string is moderately long"
rbind(
str_trunc(x, 20, "right"),
str_trunc(x, 20, "left"),
str_trunc(x, 20, "center")
)

##      [,1]                  
## [1,] "This string is mo..."
## [2,] "...s moderately long"
## [3,] "This stri...ely long"

案例2

sent <- "我来自中国"
str_trunc(sent,3,"right")#引号就占两个字符

## [1] "..."

str_trunc(sent,4,"right")

## [1] "我..."

str_trunc(sent,5,"right")

## [1] "我来自中国"

sent <- "It is a nice day!"
str_trunc(sent,3,"right")

## [1] "..."

str_trunc(sent,4,"right")

## [1] "I..."

str_trunc(sent,7,"right")

## [1] "It i..."

22.str_view

View HTML rendering of regular expression match. 查看正则表达式匹配的HTML呈现。

用法：
str_view(string, pattern, match = NA)
str_view_all(string, pattern, match = NA)

match: If TRUE, shows only strings that match the pattern. If FALSE, shows only the strings that don’t match the pattern. Otherwise (the default, NA) displays both matches and non-matches.

案例1

str_view(c("abc", "def", "fgh"), "[aeiou]")

str_view(c("abc", "def", "fgh"), "^")

str_view(c("abc", "def", "fgh"), "..")

# Show all matches with str_view_all
str_view_all(c("abc", "def", "fgh"), "d|e")

# Use match to control what is shown
str_view(c("abc", "def", "fgh"), "d|e")

str_view(c("abc", "def", "fgh"), "d|e", match = TRUE)

str_view(c("abc", "def", "fgh"), "d|e", match = FALSE)

案例2

shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk 2")
str_view(shopping_list, "\\b[a-z]{1,3}\\b")

str_view(shopping_list, "\\b[a-z]{1,2}\\b")

str_view_all(shopping_list, "\\b[a-z]{1,2}\\b")

23.str_wrap

Wrap strings into nicely formatted paragraphs. 将字符串包装成格式正确的段落。

用法：
str_wrap(string, width = 80, indent = 0, exdent = 0)

width: positive integer giving target line width in characters. A width less than or equal to 1 will put each word on its ownline. 正整数，以字符为单位给出目标行宽。小于或等于1的宽度会将每个单词放在自己的行上。

indent: non-negative integer giving indentation of first line in each paragraph 非负整数，使每个段落的第一行缩进

exdent: non-negative integer giving indentation of following lines in each paragraph 非负整数，使每段中的以下行缩进

案例1

thanks_path <- file.path(R.home("doc"), "THANKS")
thanks <- str_c(readLines(thanks_path), collapse = "\n")
thanks <- word(thanks, 1, 3, fixed("\n\n"))
cat(str_wrap(thanks), "\n")

## R would not be what it is today without the invaluable help of these people
## outside of the R core team, who contributed by donating code, bug fixes and
## documentation: Valerio Aimale, Suharto Anggono, Thomas Baier, Henrik Bengtsson,
## Roger Bivand, Ben Bolker, David Brahm, G"oran Brostr"om, Patrick Burns, Vince
## Carey, Saikat DebRoy, Matt Dowle, Brian D'Urso, Lyndon Drake, Dirk Eddelbuettel,
## Claus Ekstrom, Sebastian Fischmeister, John Fox, Paul Gilbert, Yu Gong, Gabor
## Grothendieck, Frank E Harrell Jr, Peter M. Haverty, Torsten Hothorn, Robert
## King, Kjetil Kjernsmo, Roger Koenker, Philippe Lambert, Jan de Leeuw, Jim
## Lindsey, Patrick Lindsey, Catherine Loader, Gordon Maclean, Arni Magnusson, John
## Maindonald, David Meyer, Ei-ji Nakama, Jens Oehlschaegel, Steve Oncley, Richard
## O'Keefe, Hubert Palme, Roger D. Peng, Jose' C. Pinheiro, Tony Plate, Anthony
## Rossini, Jonathan Rougier, Petr Savicky, Guenther Sawitzki, Marc Schwartz, Arun
## Srinivasan, Detlef Steuer, Bill Simpson, Gordon Smyth, Adrian Trapletti, Terry
## Therneau, Rolf Turner, Bill Venables, Gregory R. Warnes, Andreas Weingessel,
## Morten Welinder, James Wettenhall, Simon Wood, and Achim Zeileis. Others have
## written code that has been adopted by R and is acknowledged in the code files,
## including

cat(str_wrap(thanks, width = 40), "\n")

## R would not be what it is today
## without the invaluable help of these
## people outside of the R core team, who
## contributed by donating code, bug fixes
## and documentation: Valerio Aimale,
## Suharto Anggono, Thomas Baier, Henrik
## Bengtsson, Roger Bivand, Ben Bolker,
## David Brahm, G"oran Brostr"om, Patrick
## Burns, Vince Carey, Saikat DebRoy,
## Matt Dowle, Brian D'Urso, Lyndon Drake,
## Dirk Eddelbuettel, Claus Ekstrom,
## Sebastian Fischmeister, John Fox, Paul
## Gilbert, Yu Gong, Gabor Grothendieck,
## Frank E Harrell Jr, Peter M. Haverty,
## Torsten Hothorn, Robert King, Kjetil
## Kjernsmo, Roger Koenker, Philippe
## Lambert, Jan de Leeuw, Jim Lindsey,
## Patrick Lindsey, Catherine Loader,
## Gordon Maclean, Arni Magnusson, John
## Maindonald, David Meyer, Ei-ji Nakama,
## Jens Oehlschaegel, Steve Oncley, Richard
## O'Keefe, Hubert Palme, Roger D. Peng,
## Jose' C. Pinheiro, Tony Plate, Anthony
## Rossini, Jonathan Rougier, Petr Savicky,
## Guenther Sawitzki, Marc Schwartz, Arun
## Srinivasan, Detlef Steuer, Bill Simpson,
## Gordon Smyth, Adrian Trapletti, Terry
## Therneau, Rolf Turner, Bill Venables,
## Gregory R. Warnes, Andreas Weingessel,
## Morten Welinder, James Wettenhall, Simon
## Wood, and Achim Zeileis. Others have
## written code that has been adopted by R
## and is acknowledged in the code files,
## including

cat(str_wrap(thanks, width = 60, indent = 2), "\n")

##   R would not be what it is today without the invaluable help
## of these people outside of the R core team, who contributed
## by donating code, bug fixes and documentation: Valerio
## Aimale, Suharto Anggono, Thomas Baier, Henrik Bengtsson,
## Roger Bivand, Ben Bolker, David Brahm, G"oran Brostr"om,
## Patrick Burns, Vince Carey, Saikat DebRoy, Matt Dowle,
## Brian D'Urso, Lyndon Drake, Dirk Eddelbuettel, Claus
## Ekstrom, Sebastian Fischmeister, John Fox, Paul Gilbert,
## Yu Gong, Gabor Grothendieck, Frank E Harrell Jr, Peter M.
## Haverty, Torsten Hothorn, Robert King, Kjetil Kjernsmo,
## Roger Koenker, Philippe Lambert, Jan de Leeuw, Jim Lindsey,
## Patrick Lindsey, Catherine Loader, Gordon Maclean, Arni
## Magnusson, John Maindonald, David Meyer, Ei-ji Nakama,
## Jens Oehlschaegel, Steve Oncley, Richard O'Keefe, Hubert
## Palme, Roger D. Peng, Jose' C. Pinheiro, Tony Plate, Anthony
## Rossini, Jonathan Rougier, Petr Savicky, Guenther Sawitzki,
## Marc Schwartz, Arun Srinivasan, Detlef Steuer, Bill Simpson,
## Gordon Smyth, Adrian Trapletti, Terry Therneau, Rolf Turner,
## Bill Venables, Gregory R. Warnes, Andreas Weingessel, Morten
## Welinder, James Wettenhall, Simon Wood, and Achim Zeileis.
## Others have written code that has been adopted by R and is
## acknowledged in the code files, including

cat(str_wrap(thanks, width = 60, exdent = 2), "\n")

## R would not be what it is today without the invaluable help
##   of these people outside of the R core team, who contributed
##   by donating code, bug fixes and documentation: Valerio
##   Aimale, Suharto Anggono, Thomas Baier, Henrik Bengtsson,
##   Roger Bivand, Ben Bolker, David Brahm, G"oran Brostr"om,
##   Patrick Burns, Vince Carey, Saikat DebRoy, Matt Dowle,
##   Brian D'Urso, Lyndon Drake, Dirk Eddelbuettel, Claus
##   Ekstrom, Sebastian Fischmeister, John Fox, Paul Gilbert,
##   Yu Gong, Gabor Grothendieck, Frank E Harrell Jr, Peter M.
##   Haverty, Torsten Hothorn, Robert King, Kjetil Kjernsmo,
##   Roger Koenker, Philippe Lambert, Jan de Leeuw, Jim Lindsey,
##   Patrick Lindsey, Catherine Loader, Gordon Maclean, Arni
##   Magnusson, John Maindonald, David Meyer, Ei-ji Nakama,
##   Jens Oehlschaegel, Steve Oncley, Richard O'Keefe, Hubert
##   Palme, Roger D. Peng, Jose' C. Pinheiro, Tony Plate, Anthony
##   Rossini, Jonathan Rougier, Petr Savicky, Guenther Sawitzki,
##   Marc Schwartz, Arun Srinivasan, Detlef Steuer, Bill Simpson,
##   Gordon Smyth, Adrian Trapletti, Terry Therneau, Rolf Turner,
##   Bill Venables, Gregory R. Warnes, Andreas Weingessel, Morten
##   Welinder, James Wettenhall, Simon Wood, and Achim Zeileis.
##   Others have written code that has been adopted by R and is
##   acknowledged in the code files, including

cat(str_wrap(thanks, width = 0, exdent = 2), "\n")

## R
##   would
##   not
##   be
##   what
##   it
##   is
##   today
##   without
##   the
##   invaluable
##   help
##   of
##   these
##   people
##   outside
##   of
##   the
##   R
##   core
##   team,
##   who
##   contributed
##   by
##   donating
##   code,
##   bug
##   fixes
##   and
##   documentation:
##   Valerio
##   Aimale,
##   Suharto
##   Anggono,
##   Thomas
##   Baier,
##   Henrik
##   Bengtsson,
##   Roger
##   Bivand,
##   Ben
##   Bolker,
##   David
##   Brahm,
##   G"oran
##   Brostr"om,
##   Patrick
##   Burns,
##   Vince
##   Carey,
##   Saikat
##   DebRoy,
##   Matt
##   Dowle,
##   Brian
##   D'Urso,
##   Lyndon
##   Drake,
##   Dirk
##   Eddelbuettel,
##   Claus
##   Ekstrom,
##   Sebastian
##   Fischmeister,
##   John
##   Fox,
##   Paul
##   Gilbert,
##   Yu
##   Gong,
##   Gabor
##   Grothendieck,
##   Frank
##   E
##   Harrell
##   Jr,
##   Peter
##   M.
##   Haverty,
##   Torsten
##   Hothorn,
##   Robert
##   King,
##   Kjetil
##   Kjernsmo,
##   Roger
##   Koenker,
##   Philippe
##   Lambert,
##   Jan
##   de
##   Leeuw,
##   Jim
##   Lindsey,
##   Patrick
##   Lindsey,
##   Catherine
##   Loader,
##   Gordon
##   Maclean,
##   Arni
##   Magnusson,
##   John
##   Maindonald,
##   David
##   Meyer,
##   Ei-
##   ji
##   Nakama,
##   Jens
##   Oehlschaegel,
##   Steve
##   Oncley,
##   Richard
##   O'Keefe,
##   Hubert
##   Palme,
##   Roger
##   D.
##   Peng,
##   Jose'
##   C.
##   Pinheiro,
##   Tony
##   Plate,
##   Anthony
##   Rossini,
##   Jonathan
##   Rougier,
##   Petr
##   Savicky,
##   Guenther
##   Sawitzki,
##   Marc
##   Schwartz,
##   Arun
##   Srinivasan,
##   Detlef
##   Steuer,
##   Bill
##   Simpson,
##   Gordon
##   Smyth,
##   Adrian
##   Trapletti,
##   Terry
##   Therneau,
##   Rolf
##   Turner,
##   Bill
##   Venables,
##   Gregory
##   R.
##   Warnes,
##   Andreas
##   Weingessel,
##   Morten
##   Welinder,
##   James
##   Wettenhall,
##   Simon
##   Wood,
##   and
##   Achim
##   Zeileis.
##   Others
##   have
##   written
##   code
##   that
##   has
##   been
##   adopted
##   by
##   R
##   and
##   is
##   acknowledged
##   in
##   the
##   code
##   files,
##   including

24.word

Extract words from a sentence.

用法：
word(string, start = 1L, end = start, sep = fixed(" "))

sep: separator between words. Defaults to single space. 单词之间的分隔符。默认为单个空格。

案例1

sentences <- c("Jane saw a cat", "Jane sat down")
word(sentences, 1)

## [1] "Jane" "Jane"

word(sentences, 2)

## [1] "saw" "sat"

word(sentences, -1)

## [1] "cat"  "down"

word(sentences, 2, -1)

## [1] "saw a cat" "sat down"

案例2

# Also vectorised over start and end
word(sentences[1], 1:3, -1)

## [1] "Jane saw a cat" "saw a cat"      "a cat"

word(sentences[1], 1, 1:4)

## [1] "Jane"           "Jane saw"       "Jane saw a"     "Jane saw a cat"

##案例3（不懂）

# Can define words by other separators
str <- 'abc.def..123.4568.999'
word(str, 1, sep = fixed('..'))

## [1] "abc.def"

word(str, 2, sep = fixed('..'))

## [1] "123.4568.999"

word(str, 1, sep = fixed('.'))

## [1] "abc"

modifier functions

fixed

Compare literal bytes in the string. This is very fast, but not usually what you want for non-ASCII character sets. 比较字符串中的文字字节。这非常快，但通常不是非ASCII字符集所需的。

用法：
fixed(pattern, ignore_case = FALSE)

ignore_case: Should case differences be ignored in the match?是否需要区分大小写？

coll

Compare strings respecting standard collation rules.
比较符合标准整理规则的字符串。

用法：
coll(pattern, ignore_case = FALSE, locale = “en”, …)

locale: Locale to use for comparisons. See stringi::stri_locale_list() for all possible options. Defaults to “en” (English) to ensure that the default collation is consistent across platforms.

regex

The default. Uses ICU regular expressions.

用法：
regex(pattern, ignore_case = FALSE, multiline = FALSE, comments = FALSE, dotall = FALSE, …)

multiline: If TRUE, $ and ^ match the beginning and end of each line. If FALSE, the default, only match the start and end of the input.

comments: If TRUE, white space and comments beginning with # are ignored. Escape literal spaces with

dotall: If TRUE, . will also match line terminators.

boundary

Match boundaries between things.

用法:
boundary(type = c(“character”, “line_break”, “sentence”, “word”), skip_word_none = NA, …)

character
Every character is a boundary.

line_break
Boundaries are places where it is acceptable to have a line break in the current locale.

sentence
The beginnings and ends of sentences are boundaries, using intelligent rules to avoid counting abbreviations (details).

word
The beginnings and ends of words are boundaries.

skip_word_none: Ignore “words” that don’t contain any characters or numbers - i.e. punctuation. Default NA will skip such “words” only when splitting on word boundaries.

stringr

xxr

2020/9/10

1.str_c

案例1

案例2

案例3

总结

2.str_conv

案例

3.str_count

案例1

案例2

案例3

4.str_detect

案例1

案例2

案例3

5.str_dup

案例1

案例2

6.str_extract

案例1

案例2

案例3（不懂）

案例4

案例5

7.str_flatten

案例1

案例2

8.str_glue

案例1

案例2

9.str_length

案例1

案例2

10.str_locate

案例1

案例2

11.str_match

案例1（不懂）

案例2（不懂）

案例3

12.str_order

案例1

案例2

案例3

13.str_pad

案例2

14.str_remove

案例1

案例2

15.str_replace & str_replace_na

案例1

案例2

案例3（不懂）

案例4

案例5

案例6

16.str_split

案例1

案例2

17.str_starts & str_ends

案例1

案例2

18.str_sub

案例1

案例2

案例3

19.str_subset & str_which

案例1

案例2

20.str_trim & str_squish

案例1

案例2

21.str_trunc

案例1

案例2

22.str_view

案例1