packages used

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.5     v dplyr   1.0.3
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

1)

majors = read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv", header= TRUE, sep=",")

data_or_stat = function(arr){
  list = c()
  for (major in arr){
    if(grepl('data',tolower(major)) | grepl('statistics',tolower(major))){
      list = append(list,major)
    }
  }
  return(list)
}


major_list = majors$Major
data_or_stat(major_list)

## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "COMPUTER PROGRAMMING AND DATA PROCESSING"     
## [3] "STATISTICS AND DECISION SCIENCE"

#found an easier way to do it in R after writing out my solution XD
grep('data|statistics', major_list,value=TRUE,ignore.case=TRUE)

## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "COMPUTER PROGRAMMING AND DATA PROCESSING"     
## [3] "STATISTICS AND DECISION SCIENCE"

2)

fruits= c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")
string = "c('"
for (fruit in fruits){
  if(fruit == fruits[1]){
    string = paste(string,fruit,"'",sep='')
  }
  else{
    fruit = paste("'", fruit,"'",sep = "")
    string = paste(string,fruit, sep=", ")
    }
}
string = paste(string,')')
string

## [1] "c('bell pepper', 'bilberry', 'blackberry', 'blood orange', 'blueberry', 'cantaloupe', 'chili pepper', 'cloudberry', 'elderberry', 'lime', 'lychee', 'mulberry', 'olive', 'salal berry' )"

3) Define Regex Expressions:

Note: R requires \\1 to reference first call group while regex syntax is \1

1: (.)\1\1

matches a string that contains a character repeated 3 times in a row (except for line breaks) (ex:aaa)

strings = c('abc','aaa','bbb','xyz')
pattern = '(.)\\1\\1'
str_detect(strings,pattern)

## [1] FALSE  TRUE  TRUE FALSE

2: “(.)(.)\2\1”

matches a string that contains any two characters followed those two characters reversed (ex: ahha)

strings = c('abcd','aaaa','ahha','haha')
pattern = "(.)(.)\\2\\1"
str_detect(strings,pattern)

## [1] FALSE  TRUE  TRUE FALSE

3: (..)\1

matches a string that contains any two characters repeated twice (ex:ezez)

strings = c('ezez','aaaa','haha','aboab')
pattern = "(..)\\1"
str_detect(strings, pattern)

## [1]  TRUE  TRUE  TRUE FALSE

4: “(.).\1.\1”

matches a string that contains three characters repeated (ex: abacad)

strings = c('abacad','aabbcc','three','aaa')
pattern = "(.).\\1.\\1"
str_detect(strings,pattern)

## [1]  TRUE FALSE FALSE FALSE

5: "(.)(.)(.).*\3\2\1"

matches a string that contains any three characters and its reverse (ex: abcdcba)

strings = c('cccccc','abcdcba','abcdabc','ahaha')
pattern = "(.)(.)(.).*\\3\\2\\1"
str_detect(strings,pattern)

## [1]  TRUE  TRUE FALSE FALSE

4) Create Regex Expressions:

1) Start and end with the same character.

^([a-z])[a-z]*\1$

names = c('richard','hannah','bob','annie')
pattern = '^([a-z])[a-z]*\\1$'
str_detect(names, pattern)

## [1] FALSE  TRUE  TRUE FALSE

2) Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.)

[a-z]*([a-z]{2})[a-z]*\1[a-z]*

words = c('church','dog','halal','cat')
pattern = "[a-z]*([a-z]{2})[a-z]*\\1[a-z]*"
str_detect(words,pattern)

## [1]  TRUE FALSE  TRUE FALSE

3) Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)

[a-z]*([a-z])[a-z]*\1[a-z]*\1[a-z]*

numbers = c('eleven','seven','three','seventeen')
pattern = '[a-z]*([a-z])[a-z]*\\1[a-z]*\\1[a-z]*'
str_detect(numbers, pattern)

## [1]  TRUE FALSE FALSE  TRUE

DATA607 HW3

Richard

2/16/2021

1)

2)