library (stringr)
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
simpsons_names <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
simpsons_names
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
first_name_extract <- unlist(str_extract_all(simpsons_names, "\\w+\\s|[, ]\\s\\w+"))
first_name_split <- unlist(str_split(first_name_extract, ",[[:blank:]]{1}"))
first_name_all <- unlist(str_extract_all(first_name_split, "\\w+"))
first_name_all
## [1] "Moe" "C" "Timothy" "Ned" "Homer" "Julius"
last_name_extract <- unlist(str_extract_all(simpsons_names, "[^[:punct:]]\\s\\w+$|\\w+[,]"))
last_name_split <- unlist(str_split(last_name_extract, "[[:blank:]]{1}"))
last_name_all <- unlist(str_extract_all(last_name_split, "[[:alpha:]][[:alpha:]]+"))
last_name_all
## [1] "Szyslak" "Burns" "Lovejoy" "Flanders" "Simpson" "Hibbert"
Simpsons_TV <- data.frame(first_name_all,last_name_all)
Simpsons_TV
## first_name_all last_name_all
## 1 Moe Szyslak
## 2 C Burns
## 3 Timothy Lovejoy
## 4 Ned Flanders
## 5 Homer Simpson
## 6 Julius Hibbert
## logic check for title
str_detect (simpsons_names, "[.]") & !(str_detect(simpsons_names, "[,]"))
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
partb_name <- unlist(str_extract_all(simpsons_names, "\\w+[.]\\s\\w+\\s\\w+"))
partb_name
## [1] "Rev. Timothy Lovejoy" "Dr. Julius Hibbert"
## logic check for second name
unlist (str_detect(simpsons_names,"\\s\\w[.]\\s\\w+"))
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
print(paste("The second character, C. Montgomery Burns, is the only character with a second name."))
## [1] "The second character, C. Montgomery Burns, is the only character with a second name."
(a)[0-9]+\$
## testing to make sure only digit followed by dollar sign display TRUE
test_4a <- c("43526$", "8.7", "65802$", "words", "fail$", "45%", "0190$", "$67", "67$2")
str_detect(test_4a, "[0-9]+\\$")
## [1] TRUE FALSE TRUE FALSE FALSE FALSE TRUE FALSE TRUE
print(paste("A digit followed by dollar sign."))
## [1] "A digit followed by dollar sign."
(b)\b[a-z]{1,4}\b
## testing to make sure only lower case words with length between 1 and 4 display TRUE
test_4b <- c("kobe", "Bryant", "Retired", "in", "2016", "$$%", "xx1$", "thiswillfail", "hello")
str_detect(test_4b, "\\b[a-z]{1,4}\\b")
## [1] TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
print(paste("A lower case word between 1 and 4 characters long."))
## [1] "A lower case word between 1 and 4 characters long."
(c).*?\.txt$
## testing to make sure only .txt will display TRUE
test_4c <- c("pass.txt", "true.txt", "fail.pdf", "fail2.xlsx", "hello.csv", "data.txt", "xx1$", "thiswillfail", "fast.txt")
str_detect(test_4c, ".*?\\.txt$")
## [1] TRUE TRUE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
print(paste("Any combination of characters or none and matched at most once that is a .txt file."))
## [1] "Any combination of characters or none and matched at most once that is a .txt file."
(d)\d{2}/\d{2}/\d{4}
## testing to make sure only two digits / two digits / four digits will display TRUE
test_4d <- c("99/88/7777", "12/25/1911", "27/04/1971", "in", "$$%", "02.11.9877", "thiswillfail", "hello.txt")
str_detect(test_4d, "\\d{2}/\\d{2}/\\d{4}")
## [1] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
print(paste("Two digits/ two digits/ four digits or Date Format"))
## [1] "Two digits/ two digits/ four digits or Date Format"
(e)<(.+?)>.+?</\1>
## testing to make sure only characters enclosed by <></> will display TRUE
test_4e <- c("<a>enclosed</a>", "hello.txt")
str_detect(test_4e, "<(.+?)>.+?</\\1>")
## [1] TRUE FALSE
print(paste("Any mix of characters enclosed by <></>"))
## [1] "Any mix of characters enclosed by <></>"
secret_extracredit <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
decode_message = unlist(str_extract_all(secret_extracredit, "\\w+[[:punct:]]"))
uppercase_message = unlist(str_extract_all(decode_message, "[[:upper:]]|[[:punct:]]"))
secret_code = paste(uppercase_message, collapse="")
secret_code
## [1] "CONGRATULATIONS.YOU.ARE.A.SUPERNERD!"