Using the 173 majors listed in fivethirtyeight.com’s College Majors dataset https://fivethirtyeight.com/features/the-economic-guide-to-picking-a-college-major/, provide code that identifies the majors that contain either “DATA” or “STATISTICS”
x <- getURL('https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv')
collegeMajors <- read.csv(text = x)
reactable( collegeMajors %>% filter( str_detect(collegeMajors$Major, pattern = "DATA|STATISTICS") ) )
Write code that transforms the data below:
[1] “bell pepper” “bilberry” “blackberry” “blood orange”
[5] “blueberry” “cantaloupe” “chili pepper” “cloudberry”
[9] “elderberry” “lime” “lychee” “mulberry”
[13] “olive” “salal berry”
Into a format like this:
c(“bell pepper”, “bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”, “chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”, “mulberry”, “olive”, “salal berry”)
fruitsStringRaw <- "[1] \"bell pepper\" \"bilberry\" \"blackberry\" \"blood orange\"
[5] \"blueberry\" \"cantaloupe\" \"chili pepper\" \"cloudberry\"
[9] \"elderberry\" \"lime\" \"lychee\" \"mulberry\"
[13] \"olive\" \"salal berry\""
fruitsStringRaw<- unlist(str_extract_all(fruitsStringRaw, pattern = "[a-z]+[:space:]?[a-z]*") )
fruitsStringRaw
## [1] "bell pepper" "bilberry" "blackberry" "blood orange" "blueberry"
## [6] "cantaloupe" "chili pepper" "cloudberry" "elderberry" "lime"
## [11] "lychee" "mulberry" "olive" "salal berry"
Describe, in words, what these expressions will match:
testWords <- c( "aaa","aba","111","212" , "abcedfg")
str_subset(testWords, pattern = "(.)\\1\\1" )
## [1] "aaa" "111"
As written, the expression matches any character, excluding newlines, followed by the ASCII SOH character, represented by \1, twice.
testWords <- c( "aaa\1\1","aba\1\1","111","212" , "abcedfg")
str_subset(testWords, pattern = "(.)\1\1" )
## [1] "aaa\001\001" "aba\001\001"
testWords <- c( "\"abba\"","aaba","1221","\"1221\"" , "abcedfg","123\"abba\"abc")
str_subset(testWords, pattern = "\"(.)(.)\\2\\1\"")
## [1] "\"abba\"" "\"1221\"" "123\"abba\"abc"
testWords <- c( "abab","aaba","1212","abcabc" , "abcedfg", "adedeb")
str_subset(testWords, pattern = "(..)\\1")
## [1] "abab" "1212" "adedeb"
As written, the expression matches any two characters, excluding newlines, followed by the ASCII SOH character, represented by \1.
testWords <- c( "ab\1","aa\1ba","1212\1","abcabc" , "abcedfg", "adedeb")
str_subset(testWords, pattern = "(..)\1")
## [1] "ab\001" "aa\001ba" "1212\001"
testWords <- c( "\"abaca\"","\"12151\"","1212","abcabc" , "abcedfg", "adedeb")
str_subset(testWords, pattern = "\"(.).\\1.\\1\"")
## [1] "\"abaca\"" "\"12151\""
testWords <- c( "\"123abcdejdhfj321\"","\"123321\"", "\"abcanythingcba\"", "1212","abcabc" , "abcedfg", "adedeb")
str_subset(testWords, pattern = "\"(.)(.)(.).*\\3\\2\\1\"")
## [1] "\"123abcdejdhfj321\"" "\"123321\"" "\"abcanythingcba\""
^(.).*\\1\(| \^( .)\)
testWords <- c( "aasdfjdskfjksdla","b1238219839129b", "asdfkasdjfkas", "abcdef1234","123451" , "dfkdsafi21","a")
str_subset(testWords, pattern = "^(.).*\\1$|^(.)$")
## [1] "aasdfjdskfjksdla" "b1238219839129b" "123451" "a"
testWords <- c( "church","aasbdfsdbfaashadbfhaa", "12abcdef12", "213456","123451" , "dfkdsafi21","a")
str_subset(testWords, pattern = "([:alpha:][:alpha:]).*\\1")
## [1] "church" "aasbdfsdbfaashadbfhaa"
testWords <- c( "eleven","seventeen", "123123123", "abcabcabc", "dfkdsafi21","a","eee","111")
str_subset(testWords, pattern = "([:alpha:]).*\\1.*\\1")
## [1] "eleven" "seventeen" "abcabcabc" "eee"