title: “Data 607 Week 3 Assignment” author: “Bryan Persaud” date: “9/15/2019” output: pdf_document: default html_document: default —
library(stringr)
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
# 1.
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
# Seperate the ones that are good into a new vector
good_names <- c(name[1], name[3], name[4], name[6])
good_names
## [1] "Moe Szyslak" "Rev. Timothy Lovejoy" "Ned Flanders"
## [4] "Dr. Julius Hibbert"
# Get Burns and Homer fixed by putting them in their own vector
Burns_name <- c(name[2])
Homer_name <- c(name[5])
Fix_Burns <- unlist(str_split(Burns_name, ","))
Fix_Burns
## [1] "Burns" " C. Montgomery"
Fix_Homer <- unlist(str_split(Homer_name, ","))
Fix_Homer
## [1] "Simpson" " Homer"
# Get rid of white spaces using str_trim
Fix_Burns <- str_trim(Fix_Burns)
Fix_Burns
## [1] "Burns" "C. Montgomery"
Fix_Homer <- str_trim(Fix_Homer)
Fix_Homer
## [1] "Simpson" "Homer"
# Switch to first_name last_name
Fix_Burns <- str_c(Fix_Burns[2], Fix_Burns[1], sep = " ")
Fix_Burns
## [1] "C. Montgomery Burns"
Fix_Homer <- str_c(Fix_Homer[2], Fix_Homer[1], sep = " ")
Fix_Homer
## [1] "Homer Simpson"
# Combine the ones that were already good to the ones that needed to be fixed into a new vector
The_Simpsons <- c(good_names, Fix_Burns, Fix_Homer)
The_Simpsons
## [1] "Moe Szyslak" "Rev. Timothy Lovejoy" "Ned Flanders"
## [4] "Dr. Julius Hibbert" "C. Montgomery Burns" "Homer Simpson"
# 2.
The_Simpsons
## [1] "Moe Szyslak" "Rev. Timothy Lovejoy" "Ned Flanders"
## [4] "Dr. Julius Hibbert" "C. Montgomery Burns" "Homer Simpson"
Check_for_Title <- str_detect(The_Simpsons, "[[:alpha:]]{2,}\\.")
Check_for_Title
## [1] FALSE TRUE FALSE TRUE FALSE FALSE
# 3.
The_Simpsons
## [1] "Moe Szyslak" "Rev. Timothy Lovejoy" "Ned Flanders"
## [4] "Dr. Julius Hibbert" "C. Montgomery Burns" "Homer Simpson"
Check_for_Secondary_Name <- str_detect(The_Simpsons, "[A-Z]\\.{1}")
Check_for_Secondary_Name
## [1] FALSE FALSE FALSE FALSE TRUE FALSE
# 1.
# [0-9]+\\$ shows a numerical value from numbers 0 to 9 that are followed by a $.
# Example to show the ones that match the regular expression.
Example1_vector <- c('123456789$', '123456789', 'Hello World', '1$', '789$')
Example1 <- str_extract(Example1_vector, "[0-9]+\\$")
Example1
## [1] "123456789$" NA NA "1$" "789$"
# 2.
# \\b[a-z]{1,4}\\b shows letters from a to z that are lowercased and range from length 1 to 4.
# Example to show the ones that match the regular expression.
Example2_vector <- c('DATA', 'data', '0000', 'Example two', 'Simpsons')
Example2 <- str_extract(Example2_vector, "\\b[a-z]{1,4}\\b")
Example2
## [1] NA "data" NA "two" NA
# 3.
# .*?\\.txt$ shows any string of characters that end in .txt.
# Example to show the ones that match the regular expression.
Example3_vector <- c('example14.txt', 'intro to r.txt', 'intro to r.pdf', '123456789.txt', 'vector')
Example3 <- str_extract(Example3_vector, ".*?\\.txt$")
Example3
## [1] "example14.txt" "intro to r.txt" NA "123456789.txt"
## [5] NA
# 4.
# \\d{2}/\\d{2}/\\d{4} shows two numerical values followed by a / followed by two more numerical values followed by another / followed by four more numerical values. It basically shows a date format of ##/##/####.
# Example to show the ones that match the regular expression.
Example4_vector <- c('12/21/2012', 'ab/cd/efgh', '9/15/2019', '09/15/2019', '07/04/1776')
Example4 <- str_extract(Example4_vector, "\\d{2}/\\d{2}/\\d{4}")
Example4
## [1] "12/21/2012" NA NA "09/15/2019" "07/04/1776"
# 5.
# <(.+?)?.+?</\\1> shows any string of characters inside of <> followed by more string of characters followed by another string of characters inside of <> but with a / inside of <>.
# Example to show the ones that match the regular expression.
Example5_vector <- c('<hw3> Character manipulation </hw3>', '<1234> MySQL<1234>', '<another> Example <another>', '<title> How to code using R </title>')
Example5 <- str_extract(Example5_vector, "<(.+?)?.+?</\\1>")
Example5
## [1] "<hw3> Character manipulation </hw3>"
## [2] NA
## [3] NA
## [4] "<title> How to code using R </title>"
Secret_Message <- c("clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr")
Secret_Message
## [1] "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo\nUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO\nd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5\nfy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
str_extract_all(Secret_Message, "[[:upper:]]")
## [[1]]
## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" "Y" "O"
## [18] "U" "A" "R" "E" "A" "S" "U" "P" "E" "R" "N" "E" "R" "D"