#Assignment 3 by Jose Fuentes
# First part: Provide an example of at least three dataframes in R that demonstrate normalization.

# Initial DataFrame
employees_raw <- data.frame(
  employee_id = c(1, 2, 3, 4),
  employee_name = c("John", "Emma", "Mike", "Sophia"),
  department_name = c("HR", "IT", "IT", "Finance"),
  project_name = c("Recruitment", "Website", "Database", "Budgeting"),
  project_desc = c("Hiring new employees", "Company website revamp", "Database upgrade", "Budget management"),
  stringsAsFactors = FALSE
)

print(employees_raw)
##   employee_id employee_name department_name project_name           project_desc
## 1           1          John              HR  Recruitment   Hiring new employees
## 2           2          Emma              IT      Website Company website revamp
## 3           3          Mike              IT     Database       Database upgrade
## 4           4        Sophia         Finance    Budgeting      Budget management
# Normalized Employee DataFrame
employees <- data.frame(
  employee_id = c(1, 2, 3, 4),
  employee_name = c("John", "Emma", "Mike", "Sophia"),
  department_id = c(1, 2, 2, 3),
  stringsAsFactors = FALSE
)

print(employees)
##   employee_id employee_name department_id
## 1           1          John             1
## 2           2          Emma             2
## 3           3          Mike             2
## 4           4        Sophia             3
# Normalized Department DataFrame
departments <- data.frame(
  department_id = c(1, 2, 3),
  department_name = c("HR", "IT", "Finance"),
  stringsAsFactors = FALSE
)

print(departments)
##   department_id department_name
## 1             1              HR
## 2             2              IT
## 3             3         Finance
# Normalized Project DataFrame
projects <- data.frame(
  project_name = c("Recruitment", "Website", "Database", "Budgeting"),
  project_desc = c("Hiring new employees", "Company website revamp", "Database upgrade", "Budget management"),
  stringsAsFactors = FALSE
)

print(projects)
##   project_name           project_desc
## 1  Recruitment   Hiring new employees
## 2      Website Company website revamp
## 3     Database       Database upgrade
## 4    Budgeting      Budget management
# Second part: Identify majors containing "DATA" or "STATISTICS"
# Reading the CSV file from the specified path
majors_list <- read.csv("C:/Users/Dell/Downloads/majors-list.csv", stringsAsFactors = FALSE)

# View the first few rows of the data
head(majors_list)
##   FOD1P                                 Major                  Major_Category
## 1  1100                   GENERAL AGRICULTURE Agriculture & Natural Resources
## 2  1101 AGRICULTURE PRODUCTION AND MANAGEMENT Agriculture & Natural Resources
## 3  1102                AGRICULTURAL ECONOMICS Agriculture & Natural Resources
## 4  1103                       ANIMAL SCIENCES Agriculture & Natural Resources
## 5  1104                          FOOD SCIENCE Agriculture & Natural Resources
## 6  1105            PLANT SCIENCE AND AGRONOMY Agriculture & Natural Resources
# Find majors that contain either "DATA" or "STATISTICS"
majors_with_data_statistics <- majors_list[grep("DATA|STATISTICS", majors_list$Major, ignore.case = TRUE), ]

# Display the result
print(majors_with_data_statistics)
##    FOD1P                                         Major          Major_Category
## 44  6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS                Business
## 52  2101      COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
## 59  3702               STATISTICS AND DECISION SCIENCE Computers & Mathematics
# Third part: Describe what these expressions will match
# - `(.)\1\1`: Matches the same character repeating three times, like "aaa".
# - `(.)(.)\\2\\1`: Matches a pair of characters followed by their reverse, such as "abba".
# - `(..)\1`: Matches two characters repeated, for instance "a1a1".
# - `(.).\\1.\\1`: Matches a character followed by any, then the same one, another, and the first again, e.g. "abaca".
# - `(.)(.)(.).*\\3\\2\\1`: Matches three characters, any sequence, then the reverse order, like "abc1cba".

# Fourth part: Construct regular expressions to match specific patterns in words
# 1. Words that start and end with the same character.
# 2. Words that contain a repeated pair of letters.
# 3. Words that contain one letter repeated in at least three places.

# Regular expressions:
# 1. Matching Words that Start and End with the Same Character:
# regex: `^(.)[^\\1]*\\1$`

# 2. Finding Repeated Pairs of Letters:
# regex: `([A-Za-z]{2}).*\\1`

# 3. Identifying Letters Repeated at Least Three Times:
# regex: `([a-z]).*\\1.*\\1`