major_list_url<- "https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv"
majors <-read.csv(major_list_url)
You can also embed plots, for example:
## FOD1P Major Major_Category
## 44 6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS Business
## 52 2101 COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
## 59 3702 STATISTICS AND DECISION SCIENCE Computers & Mathematics
sampletext <- c('[1] "bell pepper" "bilberry" "blackberry" "blood orange"
[5] "blueberry" "cantaloupe" "chili pepper" "cloudberry"
[9] "elderberry" "lime" "lychee" "mulberry"
[13] "olive" "salal berry"')
sampletext
## [1] "[1] \"bell pepper\" \"bilberry\" \"blackberry\" \"blood orange\"\n[5] \"blueberry\" \"cantaloupe\" \"chili pepper\" \"cloudberry\" \n[9] \"elderberry\" \"lime\" \"lychee\" \"mulberry\" \n[13] \"olive\" \"salal berry\""
sampletextonlytext <- unlist(str_extract_all(sampletext, pattern="\"([a-z]+.[a-z]+)\""))
sampletext2 <- str_replace_all(sampletextonlytext,"\"","")
dput(as.character(sampletext2))
## c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry",
## "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime",
## "lychee", "mulberry", "olive", "salal berry")
(.)\1\1
tested, it is not working because of missing , I guess it mean to match a character which repeated 3 times like aaa or bbb.
“(.)(.)\2\1”
2 matching characters in reverse order
(..)\1
tested, it is not working because of missing ,I guess it mean to match a set of pattern which repeated 2 times like abab or 1212.
“(.).\1.\1”
characters between the 3 matching characters
“(.)(.)(.).*\3\2\1”
match any 3 characters in reverse order like the “(.)(.)\2\1”
library("stringr")
expression1 ="(.)\1\1"
expression2 ="(.)(.)\\2\\1"
expression3 ="(..)\1"
expression4 ="(.).\\1.\\1"
expression5 ="(.)(.)(.).*\\3\\2\\1"
dataforq3 <-c("111", "aaaa", "aaa", "1212", "1221", "a1a1", "a111a", "1abba1", "2211aa", "11111a111", "aaa11aaaaa1", "12a", "abba")
result1 <- str_subset(dataforq3,expression1)
result1
## character(0)
result2 <- str_subset(dataforq3,expression2)
result2
## [1] "aaaa" "1221" "1abba1" "11111a111" "aaa11aaaaa1"
## [6] "abba"
result3 <- str_subset(dataforq3,expression3)
result3
## character(0)
result4 <- str_subset(dataforq3,expression4)
result4
## [1] "11111a111" "aaa11aaaaa1"
result5 <- str_subset(dataforq3,expression5)
result5
## [1] "1abba1" "11111a111" "aaa11aaaaa1"
a.Start and end with the same character. b.Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.) c.Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)
dataforq4 <-c("1234", "5678", "church","mom","dad","eleven","steventeen")
expressiona="^(.)(.*\\1$)"
expressionb="([a-z][a-z]).*\\1"
expressionc="([a-z]).*\\1.*\\1"
resulta <- str_subset(dataforq4,expressiona)
resulta
## [1] "mom" "dad"
resultb <- str_subset(dataforq4,expressionb)
resultb
## [1] "church" "steventeen"
resultc <- str_subset(dataforq4,expressionc)
resultc
## [1] "eleven" "steventeen"