library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr) # For better table display
library(stringr) # For string manipulation and regex testing
url <- "https://raw.githubusercontent.com/sheriannmclarty/Identifying-Majors/refs/heads/main/majors-list.csv"
majors <- read.csv(url, stringsAsFactors = FALSE)
filtered_majors <- majors %>%
filter(grepl("DATA|STATISTICS", Major, ignore.case = TRUE))
kable(filtered_majors) # Provides a clear table format
FOD1P | Major | Major_Category |
---|---|---|
6212 | MANAGEMENT INFORMATION SYSTEMS AND STATISTICS | Business |
2101 | COMPUTER PROGRAMMING AND DATA PROCESSING | Computers & Mathematics |
3702 | STATISTICS AND DECISION SCIENCE | Computers & Mathematics |
fruits <- c(
"bell pepper", "bilberry", "blackberry", "blood orange",
"blueberry", "cantaloupe", "chili pepper", "cloudberry",
"elderberry", "lime", "lychee", "mulberry",
"olive", "salal berry"
)
print(fruits)
## [1] "bell pepper" "bilberry" "blackberry" "blood orange" "blueberry"
## [6] "cantaloupe" "chili pepper" "cloudberry" "elderberry" "lime"
## [11] "lychee" "mulberry" "olive" "salal berry"
#3. Descriptions of Regular Expressions
# Visualizing regex matches
library(stringr)
library(ggplot2)
# Use the fruit dataset as test cases
test_words <- fruits
matches_regex1 <- str_view_all(test_words, "(.)\\1\\1")
## Warning: `str_view_all()` was deprecated in stringr 1.5.0.
## ℹ Please use `str_view()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
matches_regex2 <- str_view_all(test_words, "(.)(.)\\2\\1")
matches_regex3 <- str_view_all(test_words, "(..)\\1")
matches_regex4 <- str_view_all(test_words, "(.).\\1.\\1")
matches_regex5 <- str_view_all(test_words, "(.)(.)(.).*\\3\\2\\1")
list(
"Triple Repeats" = matches_regex1,
"Four-Char Palindrome" = matches_regex2,
"Repeated Pair" = matches_regex3,
"Character Repeat w/ One in Between" = matches_regex4,
"Reversed Sequence" = matches_regex5
)
## $`Triple Repeats`
## [1] │ bell pepper
## [2] │ bilberry
## [3] │ blackberry
## [4] │ blood orange
## [5] │ blueberry
## [6] │ cantaloupe
## [7] │ chili pepper
## [8] │ cloudberry
## [9] │ elderberry
## [10] │ lime
## [11] │ lychee
## [12] │ mulberry
## [13] │ olive
## [14] │ salal berry
##
## $`Four-Char Palindrome`
## [1] │ bell p<eppe>r
## [2] │ bilberry
## [3] │ blackberry
## [4] │ blood orange
## [5] │ blueberry
## [6] │ cantaloupe
## [7] │ chili p<eppe>r
## [8] │ cloudberry
## [9] │ elderberry
## [10] │ lime
## [11] │ lychee
## [12] │ mulberry
## [13] │ olive
## [14] │ salal berry
##
## $`Repeated Pair`
## [1] │ bell pepper
## [2] │ bilberry
## [3] │ blackberry
## [4] │ blood orange
## [5] │ blueberry
## [6] │ cantaloupe
## [7] │ chili pepper
## [8] │ cloudberry
## [9] │ elderberry
## [10] │ lime
## [11] │ lychee
## [12] │ mulberry
## [13] │ olive
## [14] │ s<alal> berry
##
## $`Character Repeat w/ One in Between`
## [1] │ bell pepper
## [2] │ bilberry
## [3] │ blackberry
## [4] │ blood orange
## [5] │ blueberry
## [6] │ cantaloupe
## [7] │ chili pepper
## [8] │ cloudberry
## [9] │ elderberry
## [10] │ lime
## [11] │ lychee
## [12] │ mulberry
## [13] │ olive
## [14] │ salal berry
##
## $`Reversed Sequence`
## [1] │ bell pepper
## [2] │ bilberry
## [3] │ blackberry
## [4] │ blood orange
## [5] │ blueberry
## [6] │ cantaloupe
## [7] │ chili pepper
## [8] │ cloudberry
## [9] │ elderberry
## [10] │ lime
## [11] │ lychee
## [12] │ mulberry
## [13] │ olive
## [14] │ salal berry
# Apply regex and visualize
matches_regex1 <- str_view(fruits, "^(.)\\1$")
matches_regex2 <- str_view(fruits, "(..).*\\1")
matches_regex3 <- str_view(fruits, "(.).*?\\1.*?\\1")
list(
"Start & End Same" = matches_regex1,
"Repeated Pair" = matches_regex2,
"Letter Repeats 3x" = matches_regex3
)
## $`Start & End Same`
##
## $`Repeated Pair`
## [1] │ bell <peppe>r
## [7] │ chili <peppe>r
## [9] │ eld<erber>ry
## [14] │ s<alal> berry
##
## $`Letter Repeats 3x`
## [1] │ b<ell peppe>r
## [4] │ bl<ood o>range
## [7] │ chili <pepp>er
## [9] │ <elderbe>rry