stringr library in
Rlibrary(stringr)
library(magrittr)
str_detect return TRUE if exact match is
found.
a = c("My name is Neeraj 2", "With ? special ``", "With \n new Line", "1234")
str_detect(a, "Neeraj")
## [1] TRUE FALSE FALSE FALSE
str_detect(a, "is")
## [1] TRUE FALSE FALSE FALSE
str_detect(a, "^M") #begins with M (use ^ before this)
## [1] TRUE FALSE FALSE FALSE
str_detect(a, "e$") #end with e (use $ at the end)
## [1] FALSE FALSE TRUE FALSE
str_detect(a, "[ea]") #match anywhere (like or)
## [1] TRUE TRUE TRUE FALSE
str_detect(a, "[aeiou]")
## [1] TRUE TRUE TRUE FALSE
str_detect(a, "[:digit:]") #only check digits (contain digit anywhere)
## [1] TRUE FALSE FALSE TRUE
str_detect(a, "\\d") #same as above
## [1] TRUE FALSE FALSE TRUE
str_detect(a, "\\D") #not having digits as well return TRUE (it does not mean, it having digits at all)
## [1] TRUE TRUE TRUE FALSE
str_detect(a, "[zq]", negate = T) #TRUE for non matching case
## [1] TRUE TRUE TRUE TRUE
str_detect(a, "[je]$") #It means end with j or e
## [1] FALSE FALSE TRUE FALSE
str_detect(a, "^[aeiou]") #begins with vowels
## [1] FALSE FALSE FALSE FALSE
str_detect(a, "[:digit:]$") #end with any digit
## [1] TRUE FALSE FALSE TRUE
str_detect(a, "\n") #detect new line
## [1] FALSE FALSE TRUE FALSE
str_detect(a, "My[ n]") #My followed by space or n
## [1] TRUE FALSE FALSE FALSE
str_detect(a, "^Neeraj$") #Only contain name Neeraj
## [1] FALSE FALSE FALSE FALSE
filename <- c("abc.csv" , "stock.xlsx", "abc2.csv", "abc3.csv", "abc4.csv", "stock2.xlsx", "stock.csv", "stock2.csv")
#only need names that end with csv
str_view_all(filename, ".csv$")
#only need files that start with abc and end with csv
str_view_all(filename, "^abc.*csv$")
#above means, that start with a, followed by b and then c. ^is used for only a. But b and c must be followed by a.
str_view_all(filename, "^[as]")
Alternate of ^ and $ in patterns
are:
str_starts(a, "M")
## [1] TRUE FALSE FALSE FALSE
str_ends(a, "e")
## [1] FALSE FALSE TRUE FALSE
str_ends(a, "[:digit:]") #end with any digit
## [1] TRUE FALSE FALSE TRUE
str_which return index of strings that match with
patters. This is equivalent to
str_detect(a, pattern) %>% whichstr_which(a, "[:digit:]")
## [1] 1 4
str_detect(a, "[:digit:]")
## [1] TRUE FALSE FALSE TRUE
str_locate the first instance and return the position
in the stringstr_locate(a, "e") #output as matrix
## start end
## [1,] 7 7
## [2,] 10 10
## [3,] 9 9
## [4,] NA NA
str_locate_all(a, "e") #output as list
## [[1]]
## start end
## [1,] 7 7
## [2,] 13 13
## [3,] 14 14
##
## [[2]]
## start end
## [1,] 10 10
##
## [[3]]
## start end
## [1,] 9 9
## [2,] 15 15
##
## [[4]]
## start end
str_locate(a, "[:digit:]")
## start end
## [1,] 19 19
## [2,] NA NA
## [3,] NA NA
## [4,] 1 1
str_locate_all(a, "[aeiou]") #position of all vowels
## [[1]]
## start end
## [1,] 5 5
## [2,] 7 7
## [3,] 9 9
## [4,] 13 13
## [5,] 14 14
## [6,] 16 16
##
## [[2]]
## start end
## [1,] 2 2
## [2,] 10 10
## [3,] 12 12
## [4,] 13 13
##
## [[3]]
## start end
## [1,] 2 2
## [2,] 9 9
## [3,] 13 13
## [4,] 15 15
##
## [[4]]
## start end
str_locate_all(a, "e")
## [[1]]
## start end
## [1,] 7 7
## [2,] 13 13
## [3,] 14 14
##
## [[2]]
## start end
## [1,] 10 10
##
## [[3]]
## start end
## [1,] 9 9
## [2,] 15 15
##
## [[4]]
## start end
str_count count the string matchstr_count(a, "e") #only counts "e"
## [1] 3 1 2 0
str_count(a,"[:digit:]") #count all digits
## [1] 1 0 0 4
str_count(a, "^M")
## [1] 1 0 0 0
For the following set of words, do:
a = c("Hello", "Hmm..", "Hello 123", "Neeraj", "My Pet 2", "12321", "Vowels", "Basic3", "Minister SJ 311", "123 Hello")
Problem 2.1: Count e in the words that contain
digit.
str_subset(a, "\\d") %>% str_count(., pattern = "e")
## [1] 1 1 0 0 1 1
Problem 2.2: Detect all words that does not contain vowels.
str_subset(a, "[aeiou]", negate = T)
## [1] "Hmm.." "12321"
Problem 2.3: Count vowels in each word.
str_count(a, "[aeiou]")
## [1] 2 0 2 3 1 0 2 2 3 2
Problem 2.4: Detect all words that contain vowels and digits both.
str_subset(a, "([aeiou].*\\d)|(\\d.*[aeiou])")
## [1] "Hello 123" "My Pet 2" "Basic3" "Minister SJ 311"
## [5] "123 Hello"
Problem 2.5: Detect words that start with M and end with any digit.
str_subset(a, "^M.*\\d$")
## [1] "My Pet 2" "Minister SJ 311"