1. Identifying Majors with “DATA” or “STATISTICS”**

Load necessary library

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr) # For better table display
library(stringr) # For string manipulation and regex testing

Load the dataset

url <- "https://raw.githubusercontent.com/sheriannmclarty/Identifying-Majors/refs/heads/main/majors-list.csv"
majors <- read.csv(url, stringsAsFactors = FALSE)

Filter for majors that contain “DATA” or “STATISTICS”

filtered_majors <- majors %>%
  filter(grepl("DATA|STATISTICS", Major, ignore.case = TRUE))

Display the result

kable(filtered_majors) # Provides a clear table format
FOD1P Major Major_Category
6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS Business
2101 COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
3702 STATISTICS AND DECISION SCIENCE Computers & Mathematics

2.Transforming the List of Fruits into a Vector

Original list of fruits

fruits <- c(
  "bell pepper", "bilberry", "blackberry", "blood orange",
  "blueberry", "cantaloupe", "chili pepper", "cloudberry",
  "elderberry", "lime", "lychee", "mulberry",
  "olive", "salal berry"
)

Printing as a vector

print(fruits)
##  [1] "bell pepper"  "bilberry"     "blackberry"   "blood orange" "blueberry"   
##  [6] "cantaloupe"   "chili pepper" "cloudberry"   "elderberry"   "lime"        
## [11] "lychee"       "mulberry"     "olive"        "salal berry"

#3. Descriptions of Regular Expressions

# Visualizing regex matches
library(stringr)
library(ggplot2)
#  Use the fruit dataset as test cases
test_words <- fruits

Apply regex and show matches

matches_regex1 <- str_view_all(test_words, "(.)\\1\\1")
## Warning: `str_view_all()` was deprecated in stringr 1.5.0.
## ℹ Please use `str_view()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
matches_regex2 <- str_view_all(test_words, "(.)(.)\\2\\1")
matches_regex3 <- str_view_all(test_words, "(..)\\1")
matches_regex4 <- str_view_all(test_words, "(.).\\1.\\1")
matches_regex5 <- str_view_all(test_words, "(.)(.)(.).*\\3\\2\\1")

list(
  "Triple Repeats" = matches_regex1,
  "Four-Char Palindrome" = matches_regex2,
  "Repeated Pair" = matches_regex3,
  "Character Repeat w/ One in Between" = matches_regex4,
  "Reversed Sequence" = matches_regex5
)
## $`Triple Repeats`
##  [1] │ bell pepper
##  [2] │ bilberry
##  [3] │ blackberry
##  [4] │ blood orange
##  [5] │ blueberry
##  [6] │ cantaloupe
##  [7] │ chili pepper
##  [8] │ cloudberry
##  [9] │ elderberry
## [10] │ lime
## [11] │ lychee
## [12] │ mulberry
## [13] │ olive
## [14] │ salal berry
## 
## $`Four-Char Palindrome`
##  [1] │ bell p<eppe>r
##  [2] │ bilberry
##  [3] │ blackberry
##  [4] │ blood orange
##  [5] │ blueberry
##  [6] │ cantaloupe
##  [7] │ chili p<eppe>r
##  [8] │ cloudberry
##  [9] │ elderberry
## [10] │ lime
## [11] │ lychee
## [12] │ mulberry
## [13] │ olive
## [14] │ salal berry
## 
## $`Repeated Pair`
##  [1] │ bell pepper
##  [2] │ bilberry
##  [3] │ blackberry
##  [4] │ blood orange
##  [5] │ blueberry
##  [6] │ cantaloupe
##  [7] │ chili pepper
##  [8] │ cloudberry
##  [9] │ elderberry
## [10] │ lime
## [11] │ lychee
## [12] │ mulberry
## [13] │ olive
## [14] │ s<alal> berry
## 
## $`Character Repeat w/ One in Between`
##  [1] │ bell pepper
##  [2] │ bilberry
##  [3] │ blackberry
##  [4] │ blood orange
##  [5] │ blueberry
##  [6] │ cantaloupe
##  [7] │ chili pepper
##  [8] │ cloudberry
##  [9] │ elderberry
## [10] │ lime
## [11] │ lychee
## [12] │ mulberry
## [13] │ olive
## [14] │ salal berry
## 
## $`Reversed Sequence`
##  [1] │ bell pepper
##  [2] │ bilberry
##  [3] │ blackberry
##  [4] │ blood orange
##  [5] │ blueberry
##  [6] │ cantaloupe
##  [7] │ chili pepper
##  [8] │ cloudberry
##  [9] │ elderberry
## [10] │ lime
## [11] │ lychee
## [12] │ mulberry
## [13] │ olive
## [14] │ salal berry

4. Constructing Regular Expressions

# Apply regex and visualize
matches_regex1 <- str_view(fruits, "^(.)\\1$")
matches_regex2 <- str_view(fruits, "(..).*\\1")
matches_regex3 <- str_view(fruits, "(.).*?\\1.*?\\1")

list(
  "Start & End Same" = matches_regex1,
  "Repeated Pair" = matches_regex2,
  "Letter Repeats 3x" = matches_regex3
)
## $`Start & End Same`
## 
## $`Repeated Pair`
##  [1] │ bell <peppe>r
##  [7] │ chili <peppe>r
##  [9] │ eld<erber>ry
## [14] │ s<alal> berry
## 
## $`Letter Repeats 3x`
## [1] │ b<ell peppe>r
## [4] │ bl<ood o>range
## [7] │ chili <pepp>er
## [9] │ <elderbe>rry