library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(readr)

Normalization

# Create dataframe
car_sales <- data.frame(
  customer_name = c("John", "Sharon", "Shirley", "Daniel", "Jay"),
  customer_phone = c("111-2222", "111-3333", "111-4444", "111-5555", "111-6666"),
  car_vin = c("VIN001", "VIN002", "VIN003", "VIN004", "VIN005"),
  car_make = c("Mazeda", "Honda", "Lexus", "Tesla", "Benz"),
  car_model = c("CX-5", "CRV", "RX-350", "Model-Y", "GLE"),
  car_year = c("2020", "2019", "2024", "2023", "2023"),
  salesperson_name = c("Evan T.", "Fiona W.", "George K.", "Jessica W.", "Paige E."),
  sale_date = as.Date(c("2025-01-28", "2025-01-28", "2025-01-28", "2025-01-28", "2025-01-28")),
  sale_price = c(20000, 18000, 50000, 35000, 56000)
)
print(car_sales)
##   customer_name customer_phone car_vin car_make car_model car_year
## 1          John       111-2222  VIN001   Mazeda      CX-5     2020
## 2        Sharon       111-3333  VIN002    Honda       CRV     2019
## 3       Shirley       111-4444  VIN003    Lexus    RX-350     2024
## 4        Daniel       111-5555  VIN004    Tesla   Model-Y     2023
## 5           Jay       111-6666  VIN005     Benz       GLE     2023
##   salesperson_name  sale_date sale_price
## 1          Evan T. 2025-01-28      20000
## 2         Fiona W. 2025-01-28      18000
## 3        George K. 2025-01-28      50000
## 4       Jessica W. 2025-01-28      35000
## 5         Paige E. 2025-01-28      56000
# Customer Table
customers <- car_sales %>%
  select(customer_name, customer_phone) %>%
  distinct()

print(customers)
##   customer_name customer_phone
## 1          John       111-2222
## 2        Sharon       111-3333
## 3       Shirley       111-4444
## 4        Daniel       111-5555
## 5           Jay       111-6666
# Cars Table
cars <- car_sales %>%
  select(car_vin, car_make, car_model, car_year) %>%
  distinct()

print(cars)
##   car_vin car_make car_model car_year
## 1  VIN001   Mazeda      CX-5     2020
## 2  VIN002    Honda       CRV     2019
## 3  VIN003    Lexus    RX-350     2024
## 4  VIN004    Tesla   Model-Y     2023
## 5  VIN005     Benz       GLE     2023
# Sales Table
sales <- car_sales %>%
  select(customer_name, customer_phone, car_vin, salesperson_name, sale_date, sale_price)

print(sales)
##   customer_name customer_phone car_vin salesperson_name  sale_date sale_price
## 1          John       111-2222  VIN001          Evan T. 2025-01-28      20000
## 2        Sharon       111-3333  VIN002         Fiona W. 2025-01-28      18000
## 3       Shirley       111-4444  VIN003        George K. 2025-01-28      50000
## 4        Daniel       111-5555  VIN004       Jessica W. 2025-01-28      35000
## 5           Jay       111-6666  VIN005         Paige E. 2025-01-28      56000

Character Manipulation

majors_data <- read.csv("https://raw.githubusercontent.com/JaydeeJan/Data-607-Assignment-3/refs/heads/main/majors-list.csv")

data_stats_majors <- majors_data %>%
  filter(str_detect(Major, regex("DATA|STATISTICS", ignore_case = TRUE)))

print(data_stats_majors)
##   FOD1P                                         Major          Major_Category
## 1  6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS                Business
## 2  2101      COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
## 3  3702               STATISTICS AND DECISION SCIENCE Computers & Mathematics

Describe, in words, what these expressions will match:

(.)\1\1

1. (.) - will match any single character in first group

2. \1 - refers to the match contained in the first parenthesis

3. \1\1 - character repeated two or more times.

“(.)(.)\2\1”

1. (.)(.) - will match any single character in first group and second group

2. \2 refers to the match contained in the second parenthesis

3. \1 refers to the match contained in the first parenthesis

(..)\1

1. (..) - match with two characters

2. \1 - refers to the match contained in the first parenthesis

“(.).\1.\1”

1. (.) - will match any single character in first group

2. . - match any character

3. \1 - refers to the match contained in the first parenthesis

4. . - match any character

5. \1 - refers to the match contained in the first parenthesis

“(.)(.)(.).*\3\2\1”

1. (.) - will match any single character in first group

2. (.) - will match any single character in second group

3. (.) - will match any single character in third group

4. .* - will match any sequence of characters

5. \3 - refers to the match contained in the third parenthesis

6. \2 - refers to the match contained in the second parenthesis

7. \1 - refers to the match contained in the first parenthesis

Construct regular expressions to match words that:

Start and end with the same character.

(.)\1

Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.)

“(..).*\1”

Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)

“(.)\1.*\1”