#1 provide code that identifies the majors that contain either “DATA” or “STATISTICS”

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
college_majors <- read.csv('https://raw.githubusercontent.com/datanerddhanya/DATA607/main/COLLEGE_MAJORS.csv')
college_majors_data_statistics <- college_majors[grep("DATA|STATISTICS", college_majors$Major,ignore.case = TRUE),]
head(college_majors_data_statistics)
##    Code                                         Major          Major_Category
## 44 6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS                Business
## 52 2101      COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
## 59 3702               STATISTICS AND DECISION SCIENCE Computers & Mathematics

#2 Write code that transforms the data

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.4.4     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
fruit_list <- c("bell pepper", "bilberry", "blackberry", "blood orange",
          "blueberry", "cantaloupe", "chili pepper", "cloudberry",
          "elderberry", "lime", "lychee", "mulberry",
          "olive", "salal berry")
str_view(fruit_list)
##  [1] │ bell pepper
##  [2] │ bilberry
##  [3] │ blackberry
##  [4] │ blood orange
##  [5] │ blueberry
##  [6] │ cantaloupe
##  [7] │ chili pepper
##  [8] │ cloudberry
##  [9] │ elderberry
## [10] │ lime
## [11] │ lychee
## [12] │ mulberry
## [13] │ olive
## [14] │ salal berry
fruit_newlist  <- paste0("c(",paste0(" ","\"",fruit_list,"\"", collapse = ",") , ")")
str_view(fruit_newlist)
## [1] │ c( "bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")

#3 Describe, in words, what these expressions will match:

(.)\1\1

“(.)(.)\2\1”

(..)\1

“(.).\1.\1”

“(.)(.)(.).*\3\2\1”

str_view(words,"(.)\1\1")
str_view(words,"(.)(.)\\2\\1")
##  [19] │ after<noon>
##  [43] │ <appa>rent
##  [53] │ <arra>nge
## [107] │ b<otto>m
## [112] │ br<illi>ant
## [174] │ c<ommo>n
## [230] │ d<iffi>cult
## [259] │ <effe>ct
## [329] │ f<ollo>w
## [422] │ in<deed>
## [470] │ l<ette>r
## [521] │ m<illi>on
## [581] │ <oppo>rtunity
## [582] │ <oppo>se
## [877] │ tom<orro>w
str_view(words,"(..)\1")
str_view(words,"(.).\\1.\\1")
## [265] │ <eleve>n
str_view(words,"(.)(.)(.).*\\3\\2\\1")
## [598] │ <paragrap>h
  1. This expression matches the single letter contained in the first parenthesis repeated thrice.
  2. This expression matches the single letter contained in the first and second parenthesis repeated by the single letter contained in the second and first parenthesis.
  3. This expression matches the two letters in the parenthesis repeated twice.
  4. This expression matches the single letter contained in the first parenthesis repeating twice anywhere in the word with only one letter between them.
  5. This expression matches the single letters contained in the first,second and third parenthesis repeated by the single letters contained in the third, second and first parenthesis with zero or more letters in between them.

#4 Construct regular expressions to match words that: Start and end with the same character. Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.) Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)

library(dplyr)
str_view(words, "^(.).*\\1$")
##  [36] │ <america>
##  [49] │ <area>
## [209] │ <dad>
## [213] │ <dead>
## [223] │ <depend>
## [258] │ <educate>
## [266] │ <else>
## [268] │ <encourage>
## [270] │ <engine>
## [278] │ <europe>
## [283] │ <evidence>
## [285] │ <example>
## [287] │ <excuse>
## [288] │ <exercise>
## [291] │ <expense>
## [292] │ <experience>
## [296] │ <eye>
## [386] │ <health>
## [394] │ <high>
## [450] │ <knock>
## ... and 16 more
str_view(words, "(..).*\\1")
##  [48] │ ap<propr>iate
## [152] │ <church>
## [181] │ c<ondition>
## [217] │ <decide>
## [275] │ <environmen>t
## [487] │ l<ondon>
## [598] │ pa<ragra>ph
## [603] │ p<articular>
## [617] │ <photograph>
## [638] │ p<repare>
## [641] │ p<ressure>
## [696] │ r<emem>ber
## [698] │ <repre>sent
## [699] │ <require>
## [739] │ <sense>
## [858] │ the<refore>
## [903] │ u<nderstand>
## [946] │ w<hethe>r
str_view(words, "(.).*\\1.*\\1")
##  [48] │ a<pprop>riate
##  [62] │ <availa>ble
##  [86] │ b<elieve>
##  [90] │ b<etwee>n
## [119] │ bu<siness>
## [221] │ d<egree>
## [229] │ diff<erence>
## [233] │ di<scuss>
## [265] │ <eleve>n
## [275] │ e<nvironmen>t
## [283] │ <evidence>
## [288] │ <exercise>
## [291] │ <expense>
## [292] │ <experience>
## [423] │ <indivi>dual
## [598] │ p<aragra>ph
## [684] │ r<eceive>
## [696] │ r<emembe>r
## [698] │ r<eprese>nt
## [845] │ t<elephone>
## ... and 2 more