library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
library(dplyr)

1 FiveThirtyEight College Majors dataset

college_majors <- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv")
filter_majors <- college_majors |>
  filter(str_detect(toupper(Major), "DATA|STATISTICS"))
print(filter_majors)
##   FOD1P                                         Major          Major_Category
## 1  6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS                Business
## 2  2101      COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
## 3  3702               STATISTICS AND DECISION SCIENCE Computers & Mathematics

2 building codes

Define fruit categories

fruits_A <- c("bell pepper", "bilberry", "blackberry", "blood orange")
fruits_B <- c("blueberry", "cantaloupe", "chili pepper", "cloudberry")
fruits_C <- c("elderberry", "lime", "lychee", "mulberry")
fruits_D <- c("olive", "salal berry")


all_fruits <- c(fruits_A, fruits_B, fruits_C, fruits_D )

print(all_fruits)
##  [1] "bell pepper"  "bilberry"     "blackberry"   "blood orange" "blueberry"   
##  [6] "cantaloupe"   "chili pepper" "cloudberry"   "elderberry"   "lime"        
## [11] "lychee"       "mulberry"     "olive"        "salal berry"
length(all_fruits)
## [1] 14

3 Describe, in words, what these expressions will match:

  1. (.)\1\1”, (.)\1\1 the following string represent a single repeated character for three consecutive times. ex:“111” , “12225”
numbers <- c("12225", "111", "39997")
words_mix <- c("wooopy neeed a Glaaase of wattter"  )

matches <- str_detect(numbers, "(.)\\1\\1")

matches <- str_detect(words_mix, "(.)\\1\\1")

print(numbers[matches])
## [1] "12225" "111"   "39997"
print(words_mix[matches])
## [1] "wooopy neeed a Glaaase of wattter"
  1. “(.)(.)\2\1” This regular expression strings represent the reverse of two characters around a central point.
words <- c("CAAC", "sissis a list", "clean and clear", "1211213")

matches <- grepl("(.)(.)\\2\\1", words)

print(words[matches])
## [1] "CAAC"          "sissis a list" "1211213"
  1. (..)\1

this string represent any two characters(a pair) ex: plead, please, pl

words <- c(" hot coco", "cucured", "blue mumu")

matches <- grepl("(..)\\1", words)

print(words[matches])
## [1] " hot coco" "cucured"   "blue mumu"
  1. “(.).\1.\1” this represent characters that appear in between
words<- c("c.a.s.cas.de.", ".p.l.o.l.p.l;o", "in.th.")
matches <- grepl("(.).\\1.\\1", words)
print(words[matches])
## [1] "c.a.s.cas.de."  ".p.l.o.l.p.l;o"
  1. “(.)(.)(.).*\3\2\1” This detect string with the first three characters followed by any sequence and those characters are their reverse.
words <- c("cacasse caca,")
matches <- grepl("(.)(.)(.).*\\3\\2\\1", words)
print(words[matches])
## [1] "cacasse caca,"