library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(ggplot2)
College_Majors = read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv",show_col_types = FALSE)
str(College_Majors)
## spec_tbl_df [174 x 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ FOD1P : chr [1:174] "1100" "1101" "1102" "1103" ...
## $ Major : chr [1:174] "GENERAL AGRICULTURE" "AGRICULTURE PRODUCTION AND MANAGEMENT" "AGRICULTURAL ECONOMICS" "ANIMAL SCIENCES" ...
## $ Major_Category: chr [1:174] "Agriculture & Natural Resources" "Agriculture & Natural Resources" "Agriculture & Natural Resources" "Agriculture & Natural Resources" ...
## - attr(*, "spec")=
## .. cols(
## .. FOD1P = col_character(),
## .. Major = col_character(),
## .. Major_Category = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
head(College_Majors)
## # A tibble: 6 x 3
## FOD1P Major Major_Category
## <chr> <chr> <chr>
## 1 1100 GENERAL AGRICULTURE Agriculture & Natural Resources
## 2 1101 AGRICULTURE PRODUCTION AND MANAGEMENT Agriculture & Natural Resources
## 3 1102 AGRICULTURAL ECONOMICS Agriculture & Natural Resources
## 4 1103 ANIMAL SCIENCES Agriculture & Natural Resources
## 5 1104 FOOD SCIENCE Agriculture & Natural Resources
## 6 1105 PLANT SCIENCE AND AGRONOMY Agriculture & Natural Resources
str_subset(College_Majors$Major,'DATA')
## [1] "COMPUTER PROGRAMMING AND DATA PROCESSING"
grep('DATA', College_Majors$Major)
## [1] 52
str_subset(College_Majors$Major,'STATISTICS')
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "STATISTICS AND DECISION SCIENCE"
grep('STATISTICS', College_Majors$Major)
## [1] 44 59
[1] “bell pepper” “bilberry” “blackberry” “blood orange”
[5] “blueberry” “cantaloupe” “chili pepper” “cloudberry”
[9] “elderberry” “lime” “lychee” “mulberry”
[13] “olive” “salal berry”
Into a format like this:
c(“bell pepper”, “bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”, “chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”, “mulberry”, “olive”, “salal berry”)
fruit <- list("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")
unlist(fruit)
## [1] "bell pepper" "bilberry" "blackberry" "blood orange" "blueberry"
## [6] "cantaloupe" "chili pepper" "cloudberry" "elderberry" "lime"
## [11] "lychee" "mulberry" "olive" "salal berry"
A. (.)\1\1
This example does not have two backslashes. If it did, this matches any character, then repeat the first character and repeat the first character again. Example: ccc.
B. “(.)(.)\2\1”
This matches any character, any character, then repeat the second character, and then repeat the first character. Example: caac.
C. (..)\1
This example does not have two backslashes. If it did, this matches any two characters and then repeat them. Example: caca.
D. “(.).\1.\1”
This matches any character, any character, then repeat the first character, any character, and then repeat the first character. Example: cacbc.
E. "(.)(.)(.).*\3\2\1"
This matches any character at least four times, then repeat the third, second and first characters in that order. Example: abcdddcba.
A. Start and end with the same character.
"^(.).*\1$"
The ^ starts the word, the (.) is first character, the . is another character, the * allows for more characters and the \1& repeats the first character.
B. Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.)
"^(..).*\1$"
The ^ starts the word, the (..) is first two characters, the . is another character, the * allows for more characters and the \1 repeats the two characters.
C. Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)
“(.).\1.\1”
The (.) is first character, the . is another character, the \1 repeats the first character, the . is another character, and the \1 repeats the first character again.