library(tidyverse)
library(readr)
  1. Using the 173 majors listed in fivethirtyeight.com’s College Majors dataset [https://fivethirtyeight.com/features/the-economic-guide-to-picking-a-college-major/], provide code that identifies the majors that contain either “DATA” or “STATISTICS”

Loading the .csv file for College Majors data

degree <-  read_csv("https://raw.githubusercontent.com/AnnaMoy/Data-607/main/majors-list.csv")

degree <- degree %>%
  filter(grepl("DATA", Major) | grepl("STATISTICS", Major))

degree
## # A tibble: 3 × 3
##   FOD1P Major                                         Major_Category         
##   <chr> <chr>                                         <chr>                  
## 1 6212  MANAGEMENT INFORMATION SYSTEMS AND STATISTICS Business               
## 2 2101  COMPUTER PROGRAMMING AND DATA PROCESSING      Computers & Mathematics
## 3 3702  STATISTICS AND DECISION SCIENCE               Computers & Mathematics
  1. Write code that transforms the data below: [1] “bell pepper” “bilberry” “blackberry” “blood orange” [5] “blueberry” “cantaloupe” “chili pepper” “cloudberry”
    [9] “elderberry” “lime” “lychee” “mulberry”
    [13] “olive” “salal berry”

Into a format like this: c(“bell pepper”, “bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”, “chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”, “mulberry”, “olive”, “salal berry”)

berry <- c('[1] "bell pepper"  "bilberry"     "blackberry"   "blood orange"

[5] "blueberry"    "cantaloupe"   "chili pepper" "cloudberry"  

[9] "elderberry"   "lime"         "lychee"       "mulberry"    

[13] "olive"        "salal berry"')

a <-str_sub(berry, 5, 17)
b <-str_sub(berry, 20, 29)
c <-str_sub(berry, 35, 46)
d <-str_sub(berry, 50, 63)
e <-str_sub(berry, 70, 80)
f <-str_sub(berry, 85, 96)
g <-str_sub(berry, 101, 113)
h <-str_sub(berry, 115, 126)
i <-str_sub(berry, 135, 146)
j <-str_sub(berry, 150, 155)
k <-str_sub(berry, 165, 172)
l <-str_sub(berry, 180, 189)
m <-str_sub(berry, 201, 207)
n <-str_sub(berry, 216, 228)

berry2 <- str_c("c(",a, ", ",b,", ",c,", ",d,", ",e,", ",f,", ",g,", ",h,", ",i,", ",j,", ",k,", ",l,", ",m,", ",n,")")

writeLines(berry2)
## c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")
  1. Describe, in words, what the expressions will match:
data <- c("aaa")
str_view(data,"(.)\\1\\1")
## [1] │ <aaa>
data <- c("abba")
str_view(data,"(.)(.)\\2\\1")
## [1] │ <abba>

It represents 2 letters (2 same letters or different) that are repeating.

data <- c("baba")
str_view(data, "(..)\\1")
## [1] │ <baba>
data <- c("abaca")
str_view(data, "(.).\\1.\\1")
## [1] │ <abaca>
data <- c("abceecba")
str_view(data,"(.)(.)(.).*\\3\\2\\1")
## [1] │ <abceecba>
  1. Construct regular expressions to match words that:
data <- c("TOT", "TOTO", "TOCT")
str_view(data, "^(.).*\\1$")
## [1] │ <TOT>
## [3] │ <TOCT>
data <- c("chch", "church")
str_view(data, "(..).*\\1")
## [1] │ <chch>
## [2] │ <church>
data <- c("pupap", "elevven")
str_view(data, "(.).*\\1.*\\1")
## [1] │ <pupap>
## [2] │ <elevve>n