knitr::opts_chunk$set(echo = TRUE)
library (readr)
library(stringr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
urlfile="https://raw.githubusercontent.com/Nhodgkinson/data/master/college-majors/majors-list.csv"

mydata<-read_csv(url(urlfile))
## Rows: 174 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): FOD1P, Major, Major_Category
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Exercise 1

maj <- str_detect(string=mydata$Major, "DATA|STATISTICS", negate=FALSE)
mydata[maj, c("Major")]
## # A tibble: 3 × 1
##   Major                                        
##   <chr>                                        
## 1 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS
## 2 COMPUTER PROGRAMMING AND DATA PROCESSING     
## 3 STATISTICS AND DECISION SCIENCE

Exercise 2

“bell pepper” “bilberry” “blackberry” “blood orange”

“blueberry” “cantaloupe” “chili pepper” “cloudberry”

“elderberry” “lime” “lychee” “mulberry”

“olive” “salal berry”

str_c(fruit, collapse = "', '")
## [1] "apple', 'apricot', 'avocado', 'banana', 'bell pepper', 'bilberry', 'blackberry', 'blackcurrant', 'blood orange', 'blueberry', 'boysenberry', 'breadfruit', 'canary melon', 'cantaloupe', 'cherimoya', 'cherry', 'chili pepper', 'clementine', 'cloudberry', 'coconut', 'cranberry', 'cucumber', 'currant', 'damson', 'date', 'dragonfruit', 'durian', 'eggplant', 'elderberry', 'feijoa', 'fig', 'goji berry', 'gooseberry', 'grape', 'grapefruit', 'guava', 'honeydew', 'huckleberry', 'jackfruit', 'jambul', 'jujube', 'kiwi fruit', 'kumquat', 'lemon', 'lime', 'loquat', 'lychee', 'mandarine', 'mango', 'mulberry', 'nectarine', 'nut', 'olive', 'orange', 'pamelo', 'papaya', 'passionfruit', 'peach', 'pear', 'persimmon', 'physalis', 'pineapple', 'plum', 'pomegranate', 'pomelo', 'purple mangosteen', 'quince', 'raisin', 'rambutan', 'raspberry', 'redcurrant', 'rock melon', 'salal berry', 'satsuma', 'star fruit', 'strawberry', 'tamarillo', 'tangerine', 'ugli fruit', 'watermelon"

Exercise 3

(.)\1\1 : Matches any 3 like characters in a row
"(.)(.)\\2\\1" : Matches 2 seperate characters that repeat in reverse order
(..)\1 :  2 characters that repeat
"(.).\\1.\\1" : matches a character the repeats after another character. 
"(.)(.)(.).*\\3\\2\\1" : three characters followed by any number of characters tailed by the original 3 characters in reverse

Exercise 4

#Start and end with the same character.
str_subset(fruit, "^(.)((.*\\1$)|\\1?$)")
## character(0)
#Contain a repeated pair of letters (e.g. "church" contains "ch" repeated twice.)
str_subset(fruit, "([A-Za-z][A-Za-z]).*\\1")
##  [1] "banana"       "bell pepper"  "chili pepper" "coconut"      "cucumber"    
##  [6] "elderberry"   "jujube"       "nectarine"    "papaya"       "salal berry"
#Contain one letter repeated in at least three places (e.g. "eleven" contains three "e"s.)
str_subset(fruit, "([a-z]).*\\1.*\\1")
##  [1] "banana"            "bell pepper"       "blood orange"     
##  [4] "chili pepper"      "clementine"        "cranberry"        
##  [7] "elderberry"        "kiwi fruit"        "papaya"           
## [10] "pineapple"         "purple mangosteen" "raspberry"        
## [13] "redcurrant"        "strawberry"