1. Provide code that identifies the majors that contain either “DATA” or “STATISTICS”

library(tidyverse)
library(fivethirtyeight)

data("college_all_ages")
df <- college_all_ages
str_subset((df$major),regex("DATA|STATISTICS", ignore_case=TRUE))
## [1] "Computer Programming And Data Processing"     
## [2] "Statistics And Decision Science"              
## [3] "Management Information Systems And Statistics"

2. Write code that transforms the data below:

[1] “bell pepper” “bilberry” “blackberry” “blood orange” [5] “blueberry” “cantaloupe” “chili pepper” “cloudberry”
[9] “elderberry” “lime” “lychee” “mulberry”
[13] “olive” “salal berry”

Into a format like this:

c(“bell pepper”, “bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”, “chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”, “mulberry”, “olive”, “salal berry”)

berries <- '[1] "bell pepper"  "bilberry"     "blackberry"   "blood orange"
[5] "blueberry"    "cantaloupe"   "chili pepper" "cloudberry"  
[9] "elderberry"   "lime"         "lychee"       "mulberry"    
[13] "olive"        "salal berry"'

s <- str_extract_all(str_squish(berries),'(?<=")([:alpha:]|[:space:]){2,}(?=")')
u <- str_c(s, sep='"')
writeLines(u)
## c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")

3. Describe, in words, what these expressions will match:

(.)\1\1

Will match nothing .. no required quotes to define the regex pattern, and single backslash only for escape characters.

“(.)(.)\\2\\1”

Will match the first case where two single characters appear in order, and then appear immediately again in reverse order.

s <- "abba" # example
str_view(s, "(.)(.)\\2\\1")

(..)\1

Will match nothing .. no required quotes to define the regex pattern, and single backslash only for escape characters.

“(.).\\1.\\1”

Will match the first case where a single character appears three times with any other character (except a newline) in between.

s <- "abacab" # example
str_view(s, "(.).\\1.\\1")

"(.)(.)(.).*\\3\\2\\1"

Will match the first case where a sequence of three characters appear again in reverse order, with any single character (except a newline) in between.

s <- "abcZcba" #example
str_view(s, "(.)(.)(.).*\\3\\2\\1")

4. Construct regular expressions to match words that:

  1. Start and end with the same character.
s <- c('goat','llama','alpaca')
str_match(s, "^(.).*\\1$")
##      [,1]     [,2]
## [1,] NA       NA  
## [2,] NA       NA  
## [3,] "alpaca" "a"
  1. Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.)
s <- c('hall','lurch','church')
str_match(s,"^.*(..).*\\1.*$")
##      [,1]     [,2]
## [1,] NA       NA  
## [2,] NA       NA  
## [3,] "church" "ch"
  1. Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)
s <- c('ten','eleven','alpaca')
str_match(s, "^.*(.).*\\1.*\\1.*$")
##      [,1]     [,2]
## [1,] NA       NA  
## [2,] "eleven" "e" 
## [3,] "alpaca" "a"