library(readr)
library(stringr)
The Majors that Contain ‘Data’ and ‘Statistics’ are Computer Programming and Data Processing, Management Information Systems and Statistics, and Statistics and Decision Science.
majors_list <- read_csv(url("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv"))
## Rows: 174 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): FOD1P, Major, Major_Category
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(majors_list)
## # A tibble: 6 × 3
## FOD1P Major Major_Category
## <chr> <chr> <chr>
## 1 1100 GENERAL AGRICULTURE Agriculture & Natural Resources
## 2 1101 AGRICULTURE PRODUCTION AND MANAGEMENT Agriculture & Natural Resources
## 3 1102 AGRICULTURAL ECONOMICS Agriculture & Natural Resources
## 4 1103 ANIMAL SCIENCES Agriculture & Natural Resources
## 5 1104 FOOD SCIENCE Agriculture & Natural Resources
## 6 1105 PLANT SCIENCE AND AGRONOMY Agriculture & Natural Resources
grep(pattern = 'DATA', majors_list$Major, value = TRUE, ignore.case = TRUE)
## [1] "COMPUTER PROGRAMMING AND DATA PROCESSING"
grep(pattern = 'Statistics', majors_list$Major, value = TRUE, ignore.case = TRUE)
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "STATISTICS AND DECISION SCIENCE"
Starting with one messy String we are going to get it into separated values into Vectors.
Original<-c ('[1] "bell pepper" "bilberry" "blackberry" "blood orange"
[5] "blueberry" "cantaloupe" "chili pepper" "cloudberry"
[9] "elderberry" "lime" "lychee" "mulberry"
[13] "olive" "salal berry"')
Desired <-c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")
Original <- str_extract_all(Original, ('[[A-Za-z]]+\\s[[A-Za-z]]+|[[A-Za-z]]+'))
Results<-unlist(Original)
print(Results)
## [1] "bell pepper" "bilberry" "blackberry" "blood orange" "blueberry"
## [6] "cantaloupe" "chili pepper" "cloudberry" "elderberry" "lime"
## [11] "lychee" "mulberry" "olive" "salal berry"
print(Desired)
## [1] "bell pepper" "bilberry" "blackberry" "blood orange" "blueberry"
## [6] "cantaloupe" "chili pepper" "cloudberry" "elderberry" "lime"
## [11] "lychee" "mulberry" "olive" "salal berry"
identical(Results, Desired)
## [1] TRUE
#The following words would have the same character appearing 3 times in a row
str_subset(words, "(.)\1\1")
## character(0)
#The following words will have one letter followed by 2 of the same letters and then the original letter again
str_subset(words, "(.)(.)\\2\\1")
## [1] "afternoon" "apparent" "arrange" "bottom" "brilliant"
## [6] "common" "difficult" "effect" "follow" "indeed"
## [11] "letter" "million" "opportunity" "oppose" "tomorrow"
#The following words contains any two characters repeated
str_subset(words, "(..)\1")
## character(0)
#The following words in this group would contain a letter that is repeated every other time for 3 times. This would be "E"
str_subset(words, "(.).\\1.\\1")
## [1] "eleven"
#The following words contain three characters followed by zero or more characters of any kind followed by the same three characters but in reverse order. This would be "PAR" and "RAP"
str_subset(words, "(.)(.)(.).*\\3\\2\\1")
## [1] "paragraph"
#Start and end with the same Character
str_subset(words, "^(.)((.*\\1$)|\\1?$)")
## [1] "a" "america" "area" "dad" "dead"
## [6] "depend" "educate" "else" "encourage" "engine"
## [11] "europe" "evidence" "example" "excuse" "exercise"
## [16] "expense" "experience" "eye" "health" "high"
## [21] "knock" "level" "local" "nation" "non"
## [26] "rather" "refer" "remember" "serious" "stairs"
## [31] "test" "tonight" "transport" "treat" "trust"
## [36] "window" "yesterday"
#Contain a repeated pair of letters(Example is Church)
str_subset("church", "([A-Za-z][A-Za-z]).*\\1")
## [1] "church"
str_subset(words, "([A-Za-z][A-Za-z]).*\\1")
## [1] "appropriate" "church" "condition" "decide" "environment"
## [6] "london" "paragraph" "particular" "photograph" "prepare"
## [11] "pressure" "remember" "represent" "require" "sense"
## [16] "therefore" "understand" "whether"
#Contain One letter repeated in at least 3 places(Such as Eleven that contains 3 e's)
str_subset("eleven", "([a-z]).*\\1.*\\1")
## [1] "eleven"
str_subset(words, "([a-z]).*\\1.*\\1")
## [1] "appropriate" "available" "believe" "between" "business"
## [6] "degree" "difference" "discuss" "eleven" "environment"
## [11] "evidence" "exercise" "expense" "experience" "individual"
## [16] "paragraph" "receive" "remember" "represent" "telephone"
## [21] "therefore" "tomorrow"