#load libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#Part 1: Provide an example of at least three dataframes in R that demonstrate normalization. The dataframes can contain any data, either real or synthetic. Although normalization is typically done in SQL and relational databases, you are expected to show this example in R, as it is our main work environment in this course.
grocerylist_denormalized <- data.frame(
productID = c(1, 1, 1, 2, 2, 3, 3, 4, 5),
productName = c("Lemon", "Lemon", "Lemon", "Sweet Potatoes","Sweet Potatoes", "Lime","Lime", "Steak", "Chicken"),
productCost = c(0.10, 0.10, 0.10, 0.50, 0.50, 0.15, 0.15, 8.50, 4.50 ),
productSellPrice = c(0.50, 0.50, 0.50, 2.00, 2.00, 0.35, 0.35, 18.20, 9.97),
CategoryID = c(1, 1, 1, 2, 2, 1, 1, 4, 3)
)
print(grocerylist_denormalized)
## productID productName productCost productSellPrice CategoryID
## 1 1 Lemon 0.10 0.50 1
## 2 1 Lemon 0.10 0.50 1
## 3 1 Lemon 0.10 0.50 1
## 4 2 Sweet Potatoes 0.50 2.00 2
## 5 2 Sweet Potatoes 0.50 2.00 2
## 6 3 Lime 0.15 0.35 1
## 7 3 Lime 0.15 0.35 1
## 8 4 Steak 8.50 18.20 4
## 9 5 Chicken 4.50 9.97 3
groceryCatgories_normalized <- data.frame(
CategoryID = c(1, 2, 3, 4),
CategoryName = c("Fruit","Vegetable","Poultry", "Red Meat" )
)
print(groceryCatgories_normalized)
## CategoryID CategoryName
## 1 1 Fruit
## 2 2 Vegetable
## 3 3 Poultry
## 4 4 Red Meat
Product <- data.frame(
ProductID= unique(grocerylist_denormalized$productID),
ProductName= unique(grocerylist_denormalized$productName)
)
print(Product)
## ProductID ProductName
## 1 1 Lemon
## 2 2 Sweet Potatoes
## 3 3 Lime
## 4 4 Steak
## 5 5 Chicken
ProductInfo <- data.frame(
ProductID= unique(grocerylist_denormalized$productID),
ProductName= unique(grocerylist_denormalized$productName),
ProductPrice= unique(grocerylist_denormalized$productSellPrice)
)
print(ProductInfo)
## ProductID ProductName ProductPrice
## 1 1 Lemon 0.50
## 2 2 Sweet Potatoes 2.00
## 3 3 Lime 0.35
## 4 4 Steak 18.20
## 5 5 Chicken 9.97
#Part 2 Character Manipulation: using original data file from github
majors_list <- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/refs/heads/master/college-majors/majors-list.csv")
View(majors_list)
#code that identifies the majors that either contain “DATA” or “STATISTICS”
datastats_majors <- majors_list$Major[str_detect(majors_list$Major, "DATA|STATISTICS")]
datastats_majors <- data.frame(datastats_majors)
View(datastats_majors)
#Part 3 Describe, in words, what these expressions will match:
(.)\1\1 : This expression will match the same character appearing three times in a row. For example: “aaa” or “rrr”
expression <- "(.)\\1\\1"
str_view(expression)
## [1] │ (.)\1\1
str_view(c(words, "aaa","rrr","444"), expression)
## [981] │ <aaa>
## [982] │ <rrr>
## [983] │ <444>
“(.)(.)\2\1” : This expression will match a pair of characters followed by the same pair of characters reversed. For example: “bccb” or “effe”
expression <- "(.)(.)\\2\\1"
str_view(expression)
## [1] │ (.)(.)\2\1
str_view(c(words), expression)
## [19] │ after<noon>
## [43] │ <appa>rent
## [53] │ <arra>nge
## [107] │ b<otto>m
## [112] │ br<illi>ant
## [174] │ c<ommo>n
## [230] │ d<iffi>cult
## [259] │ <effe>ct
## [329] │ f<ollo>w
## [422] │ in<deed>
## [470] │ l<ette>r
## [521] │ m<illi>on
## [581] │ <oppo>rtunity
## [582] │ <oppo>se
## [877] │ tom<orro>w
(..)\1 : This expression will match any two charcters repeated. For example: “b2b2” or “c3c3”
expression <- "(..)\\1"
str_view(expression)
## [1] │ (..)\1
str_view(c(words), expression)
## [696] │ r<emem>ber
“(.).\1.\1” : This expression will match a character followed by any character, the origina characterm any other character, the original character again. For example: “abaca” or “b3b.b”
expression <- "(.).\\1.\\1"
str_view(expression)
## [1] │ (.).\1.\1
str_view(c(words), expression)
## [265] │ <eleve>n
“(.)(.)(.).*\3\2\1” This expression will match three characters followed by zero or more characters of any kinds followed by the same 3 characters but reverse. For example: “abccba” or “abc1cba”
expression <- "(.)(.)(.).*\\3\\2\\1"
str_view(expression)
## [1] │ (.)(.)(.).*\3\2\1
str_view(c(words), expression)
## [598] │ <paragrap>h
#Part 4 Construct regular expressions to match words that:
expression <- "^(.).*\\1$"
str_view(expression)
## [1] │ ^(.).*\1$
str_view(c(words), expression)
## [36] │ <america>
## [49] │ <area>
## [209] │ <dad>
## [213] │ <dead>
## [223] │ <depend>
## [258] │ <educate>
## [266] │ <else>
## [268] │ <encourage>
## [270] │ <engine>
## [278] │ <europe>
## [283] │ <evidence>
## [285] │ <example>
## [287] │ <excuse>
## [288] │ <exercise>
## [291] │ <expense>
## [292] │ <experience>
## [296] │ <eye>
## [386] │ <health>
## [394] │ <high>
## [450] │ <knock>
## ... and 16 more
expression <- "(..).*\\1"
str_view(expression)
## [1] │ (..).*\1
str_view(c(words), expression)
## [48] │ ap<propr>iate
## [152] │ <church>
## [181] │ c<ondition>
## [217] │ <decide>
## [275] │ <environmen>t
## [487] │ l<ondon>
## [598] │ pa<ragra>ph
## [603] │ p<articular>
## [617] │ <photograph>
## [638] │ p<repare>
## [641] │ p<ressure>
## [696] │ r<emem>ber
## [698] │ <repre>sent
## [699] │ <require>
## [739] │ <sense>
## [858] │ the<refore>
## [903] │ u<nderstand>
## [946] │ w<hethe>r
expression <- "(.).*\\1.*\\1"
str_view(expression)
## [1] │ (.).*\1.*\1
str_view(c(words), expression)
## [48] │ a<pprop>riate
## [62] │ <availa>ble
## [86] │ b<elieve>
## [90] │ b<etwee>n
## [119] │ bu<siness>
## [221] │ d<egree>
## [229] │ diff<erence>
## [233] │ di<scuss>
## [265] │ <eleve>n
## [275] │ e<nvironmen>t
## [283] │ <evidence>
## [288] │ <exercise>
## [291] │ <expense>
## [292] │ <experience>
## [423] │ <indivi>dual
## [598] │ p<aragra>ph
## [684] │ r<eceive>
## [696] │ r<emembe>r
## [698] │ r<eprese>nt
## [845] │ t<elephone>
## ... and 2 more