library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(readr)
Normalization
# Create dataframe
car_sales <- data.frame(
customer_name = c("John", "Sharon", "Shirley", "Daniel", "Jay"),
customer_phone = c("111-2222", "111-3333", "111-4444", "111-5555", "111-6666"),
car_vin = c("VIN001", "VIN002", "VIN003", "VIN004", "VIN005"),
car_make = c("Mazeda", "Honda", "Lexus", "Tesla", "Benz"),
car_model = c("CX-5", "CRV", "RX-350", "Model-Y", "GLE"),
car_year = c("2020", "2019", "2024", "2023", "2023"),
salesperson_name = c("Evan T.", "Fiona W.", "George K.", "Jessica W.", "Paige E."),
sale_date = as.Date(c("2025-01-28", "2025-01-28", "2025-01-28", "2025-01-28", "2025-01-28")),
sale_price = c(20000, 18000, 50000, 35000, 56000)
)
print(car_sales)
## customer_name customer_phone car_vin car_make car_model car_year
## 1 John 111-2222 VIN001 Mazeda CX-5 2020
## 2 Sharon 111-3333 VIN002 Honda CRV 2019
## 3 Shirley 111-4444 VIN003 Lexus RX-350 2024
## 4 Daniel 111-5555 VIN004 Tesla Model-Y 2023
## 5 Jay 111-6666 VIN005 Benz GLE 2023
## salesperson_name sale_date sale_price
## 1 Evan T. 2025-01-28 20000
## 2 Fiona W. 2025-01-28 18000
## 3 George K. 2025-01-28 50000
## 4 Jessica W. 2025-01-28 35000
## 5 Paige E. 2025-01-28 56000
# Customer Table
customers <- car_sales %>%
select(customer_name, customer_phone) %>%
distinct()
print(customers)
## customer_name customer_phone
## 1 John 111-2222
## 2 Sharon 111-3333
## 3 Shirley 111-4444
## 4 Daniel 111-5555
## 5 Jay 111-6666
# Cars Table
cars <- car_sales %>%
select(car_vin, car_make, car_model, car_year) %>%
distinct()
print(cars)
## car_vin car_make car_model car_year
## 1 VIN001 Mazeda CX-5 2020
## 2 VIN002 Honda CRV 2019
## 3 VIN003 Lexus RX-350 2024
## 4 VIN004 Tesla Model-Y 2023
## 5 VIN005 Benz GLE 2023
# Sales Table
sales <- car_sales %>%
select(customer_name, customer_phone, car_vin, salesperson_name, sale_date, sale_price)
print(sales)
## customer_name customer_phone car_vin salesperson_name sale_date sale_price
## 1 John 111-2222 VIN001 Evan T. 2025-01-28 20000
## 2 Sharon 111-3333 VIN002 Fiona W. 2025-01-28 18000
## 3 Shirley 111-4444 VIN003 George K. 2025-01-28 50000
## 4 Daniel 111-5555 VIN004 Jessica W. 2025-01-28 35000
## 5 Jay 111-6666 VIN005 Paige E. 2025-01-28 56000
Character Manipulation
majors_data <- read.csv("https://raw.githubusercontent.com/JaydeeJan/Data-607-Assignment-3/refs/heads/main/majors-list.csv")
data_stats_majors <- majors_data %>%
filter(str_detect(Major, regex("DATA|STATISTICS", ignore_case = TRUE)))
print(data_stats_majors)
## FOD1P Major Major_Category
## 1 6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS Business
## 2 2101 COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
## 3 3702 STATISTICS AND DECISION SCIENCE Computers & Mathematics
Describe, in words, what these expressions will match:
(.)\1\1
1. (.) - will match any single character in first group
2. \1 - refers to the match contained in the first parenthesis
3. \1\1 - character repeated two or more times.
“(.)(.)\2\1”
1. (.)(.) - will match any single character in first group and
second group
2. \2 refers to the match contained in the second parenthesis
3. \1 refers to the match contained in the first parenthesis
(..)\1
1. (..) - match with two characters
2. \1 - refers to the match contained in the first parenthesis
“(.).\1.\1”
1. (.) - will match any single character in first group
2. . - match any character
3. \1 - refers to the match contained in the first parenthesis
4. . - match any character
5. \1 - refers to the match contained in the first parenthesis
“(.)(.)(.).*\3\2\1”
1. (.) - will match any single character in first group
2. (.) - will match any single character in second group
3. (.) - will match any single character in third group
4. .* - will match any sequence of characters
5. \3 - refers to the match contained in the third parenthesis
6. \2 - refers to the match contained in the second parenthesis
7. \1 - refers to the match contained in the first parenthesis
Construct regular expressions to match words that:
Start and end with the same character.
(.)\1
Contain a repeated pair of letters (e.g. “church” contains “ch”
repeated twice.)
“(..).*\1”
Contain one letter repeated in at least three places (e.g. “eleven”
contains three “e”s.)
“(.)\1.*\1”