majors <- read.csv(url('https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv'), stringsAsFactors = F)
str(majors)
## 'data.frame': 174 obs. of 3 variables:
## $ FOD1P : chr "1100" "1101" "1102" "1103" ...
## $ Major : chr "GENERAL AGRICULTURE" "AGRICULTURE PRODUCTION AND MANAGEMENT" "AGRICULTURAL ECONOMICS" "ANIMAL SCIENCES" ...
## $ Major_Category: chr "Agriculture & Natural Resources" "Agriculture & Natural Resources" "Agriculture & Natural Resources" "Agriculture & Natural Resources" ...
majors$Major[grepl('DATA', majors$Major)]
## [1] "COMPUTER PROGRAMMING AND DATA PROCESSING"
majors$Major[grepl('STATISTICS', majors$Major)]
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "STATISTICS AND DECISION SCIENCE"
[1] “bell pepper” “bilberry” “blackberry” “blood orange”
[5] “blueberry” “cantaloupe” “chili pepper” “cloudberry”
[9] “elderberry” “lime” “lychee” “mulberry”
[13] “olive” “salal berry”
Into a format like this:
c(“bell pepper”, “bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”, “chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”, “mulberry”, “olive”, “salal berry”)
blob <- '[1] "bell pepper" "bilberry" "blackberry" "blood orange"
[5] "blueberry" "cantaloupe" "chili pepper" "cloudberry"
[9] "elderberry" "lime" "lychee" "mulberry"
[13] "olive" "salal berry"'
library(stringr)
foods <- str_extract_all(blob, '[a-z]+\\s[a-z]+|[a-z]+')
unlist(foods)
## [1] "bell pepper" "bilberry" "blackberry" "blood orange" "blueberry"
## [6] "cantaloupe" "chili pepper" "cloudberry" "elderberry" "lime"
## [11] "lychee" "mulberry" "olive" "salal berry"
(.)\1\1
“(.)(.)\\2\\1”
(..)\1
“(.).\\1.\\1”
"(.)(.)(.).*\\3\\2\\1"
Start and end with the same character.
Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.)
Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)