1 FiveThirtyEight College Majors dataset

college_majors <- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv")

filter_majors <- college_majors |>
  filter(str_detect(toupper(Major), "DATA|STATISTICS"))
print(filter_majors)

##   FOD1P                                         Major          Major_Category
## 1  6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS                Business
## 2  2101      COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
## 3  3702               STATISTICS AND DECISION SCIENCE Computers & Mathematics

Define fruit categories

fruits_A <- c("bell pepper", "bilberry", "blackberry", "blood orange")
fruits_B <- c("blueberry", "cantaloupe", "chili pepper", "cloudberry")
fruits_C <- c("elderberry", "lime", "lychee", "mulberry")
fruits_D <- c("olive", "salal berry")


all_fruits <- c(fruits_A, fruits_B, fruits_C, fruits_D )

print(all_fruits)

##  [1] "bell pepper"  "bilberry"     "blackberry"   "blood orange" "blueberry"   
##  [6] "cantaloupe"   "chili pepper" "cloudberry"   "elderberry"   "lime"        
## [11] "lychee"       "mulberry"     "olive"        "salal berry"

length(all_fruits)

## [1] 14

3 Describe, in words, what these expressions will match:

(.)\1\1”, (.)\1\1 the following string represent a single repeated character for three consecutive times. ex:“111” , “12225”

numbers <- c("12225", "111", "39997")
words_mix <- c("wooopy neeed a Glaaase of wattter"  )

matches <- str_detect(numbers, "(.)\\1\\1")

matches <- str_detect(words_mix, "(.)\\1\\1")

print(numbers[matches])

## [1] "12225" "111"   "39997"

print(words_mix[matches])

## [1] "wooopy neeed a Glaaase of wattter"

“(.)(.)\2\1” This regular expression strings represent the reverse of two characters around a central point.

words <- c("CAAC", "sissis a list", "clean and clear", "1211213")

matches <- grepl("(.)(.)\\2\\1", words)

print(words[matches])

## [1] "CAAC"          "sissis a list" "1211213"

(..)\1

this string represent any two characters(a pair) ex: plead, please, pl

words <- c(" hot coco", "cucured", "blue mumu")

matches <- grepl("(..)\\1", words)

print(words[matches])

## [1] " hot coco" "cucured"   "blue mumu"

“(.).\1.\1” this represent characters that appear in between

words<- c("c.a.s.cas.de.", ".p.l.o.l.p.l;o", "in.th.")
matches <- grepl("(.).\\1.\\1", words)
print(words[matches])

## [1] "c.a.s.cas.de."  ".p.l.o.l.p.l;o"

“(.)(.)(.).*\3\2\1” This detect string with the first three characters followed by any sequence and those characters are their reverse.

words <- c("cacasse caca,")
matches <- grepl("(.)(.)(.).*\\3\\2\\1", words)
print(words[matches])

## [1] "cacasse caca,"

Ass 3

Woodelyne Durosier

2025-02-16

1 FiveThirtyEight College Majors dataset

2 building codes

Define fruit categories

3 Describe, in words, what these expressions will match: