This assignment deals with string manipulation below are 4 exercises
library(DT)
library(stringr)
majors <- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/2d2ff3e9457549d51f8e571c52099bfe9b2017ad/college-majors/majors-list.csv")
filtered <- majors$Major %>% str_subset("DATA|STATISTICS")
filtered
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "COMPUTER PROGRAMMING AND DATA PROCESSING"
## [3] "STATISTICS AND DECISION SCIENCE"
[1] “bell pepper” “bilberry” “blackberry” “blood orange”
[5] “blueberry” “cantaloupe” “chili pepper” “cloudberry”
[9] “elderberry” “lime” “lychee” “mulberry”
[13] “olive” “salal berry”
Into a format like this:
c(“bell pepper”, “bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”, “chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”, “mulberry”, “olive”, “salal berry”)
str <- '[1] "bell pepper" "bilberry" "blackberry" "blood orange"
[5] "blueberry" "cantaloupe" "chili pepper" "cloudberry"
[9] "elderberry" "lime" "lychee" "mulberry"
[13] "olive" "salal berry"'
str2 <- str_extract_all(str,pattern = '[A-Za-z]+.?[A-Za-z]+')
vect <- str_c(str2, collapse = ", ")
writeLines(vect)
## c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")
Match any character appearing 3 times in a row
sample <- c("Awww","Hmmm","lol")
str_view(sample,"(.)\\1\\1")
Match any words pair of characters followed by same pair of character is reversed order
sample <- c("noon","teeth")
str_view(sample,"(.)(.)\\2\\1")
Matches a pair of characters which has a repetition of same pair
sample <- c("dada","haha")
str_view(sample,"(..)\\1")
iv)“(.).\1.\1”
Match a expression which has - a character followed by any character then the same character as initial character then again any character ended by the same as first character
sample <- c("abaha")
str_view(sample,"(.).\\1.\\1")
Match any expression which are in - 3 characters followed by 0 or more characters then followed by 3 characters of the start in reverse order
sample <- c("momismom")
str_view(sample,"(.)(.)(.).*\\3\\2\\1")
i)Start and end with the same character.
str_view(c("dad","settings","day"),"(.).*\\1")
str_view(c("church","joggingg"),"(..).*\\1")
iii)Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)
str_view(c("deleven","five"),"(.).*\\1.*\\1")