Raw Data

library (stringr)
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
simpsons_names <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
simpsons_names
## [1] "Moe Szyslak"          "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Julius Hibbert"

3A

First Name Clean Up

first_name_extract <- unlist(str_extract_all(simpsons_names, "\\w+\\s|[, ]\\s\\w+"))
first_name_split <- unlist(str_split(first_name_extract, ",[[:blank:]]{1}"))
first_name_all <- unlist(str_extract_all(first_name_split, "\\w+"))
first_name_all
## [1] "Moe"     "C"       "Timothy" "Ned"     "Homer"   "Julius"

Last Name Clean Up

last_name_extract <- unlist(str_extract_all(simpsons_names, "[^[:punct:]]\\s\\w+$|\\w+[,]"))
last_name_split <- unlist(str_split(last_name_extract, "[[:blank:]]{1}"))
last_name_all <- unlist(str_extract_all(last_name_split, "[[:alpha:]][[:alpha:]]+"))
last_name_all
## [1] "Szyslak"  "Burns"    "Lovejoy"  "Flanders" "Simpson"  "Hibbert"

Load Character Names into Data Frame

Simpsons_TV <- data.frame(first_name_all,last_name_all)
Simpsons_TV
##   first_name_all last_name_all
## 1            Moe       Szyslak
## 2              C         Burns
## 3        Timothy       Lovejoy
## 4            Ned      Flanders
## 5          Homer       Simpson
## 6         Julius       Hibbert

3B

## logic check for title
str_detect (simpsons_names, "[.]") & !(str_detect(simpsons_names, "[,]"))
## [1] FALSE FALSE  TRUE FALSE FALSE  TRUE

Two Characters with Title

partb_name <- unlist(str_extract_all(simpsons_names, "\\w+[.]\\s\\w+\\s\\w+"))
partb_name
## [1] "Rev. Timothy Lovejoy" "Dr. Julius Hibbert"

3C

## logic check for second name
unlist (str_detect(simpsons_names,"\\s\\w[.]\\s\\w+"))
## [1] FALSE  TRUE FALSE FALSE FALSE FALSE
print(paste("The second character, C. Montgomery Burns, is the only character with a second name."))
## [1] "The second character, C. Montgomery Burns, is the only character with a second name."

4

(a)[0-9]+\$

## testing to make sure only digit followed by dollar sign display TRUE
test_4a <- c("43526$", "8.7", "65802$", "words", "fail$", "45%", "0190$", "$67", "67$2")
str_detect(test_4a, "[0-9]+\\$")
## [1]  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE
print(paste("A digit followed by dollar sign."))
## [1] "A digit followed by dollar sign."

(b)\b[a-z]{1,4}\b

## testing to make sure only lower case words with length between 1 and 4 display TRUE
test_4b <- c("kobe", "Bryant", "Retired", "in", "2016", "$$%", "xx1$", "thiswillfail", "hello")
str_detect(test_4b, "\\b[a-z]{1,4}\\b")
## [1]  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE
print(paste("A lower case word between 1 and 4 characters long."))
## [1] "A lower case word between 1 and 4 characters long."

(c).*?\.txt$

## testing to make sure only .txt will display TRUE
test_4c <- c("pass.txt", "true.txt", "fail.pdf", "fail2.xlsx", "hello.csv", "data.txt", "xx1$", "thiswillfail", "fast.txt")
str_detect(test_4c, ".*?\\.txt$")
## [1]  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE
print(paste("Any combination of characters or none and matched at most once that is a .txt file."))
## [1] "Any combination of characters or none and matched at most once that is a .txt file."

(d)\d{2}/\d{2}/\d{4}

## testing to make sure only two digits / two digits / four digits will display TRUE
test_4d <- c("99/88/7777", "12/25/1911", "27/04/1971", "in",  "$$%", "02.11.9877", "thiswillfail", "hello.txt")
str_detect(test_4d, "\\d{2}/\\d{2}/\\d{4}")
## [1]  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE
print(paste("Two digits/ two digits/ four digits or Date Format"))
## [1] "Two digits/ two digits/ four digits or Date Format"

(e)<(.+?)>.+?</\1>

## testing to make sure only characters enclosed by <></> will display TRUE
test_4e <- c("<a>enclosed</a>", "hello.txt")
str_detect(test_4e, "<(.+?)>.+?</\\1>")
## [1]  TRUE FALSE
print(paste("Any mix of characters enclosed by <></>"))
## [1] "Any mix of characters enclosed by <></>"

9

secret_extracredit <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"

decode_message = unlist(str_extract_all(secret_extracredit, "\\w+[[:punct:]]"))
uppercase_message = unlist(str_extract_all(decode_message, "[[:upper:]]|[[:punct:]]"))
secret_code = paste(uppercase_message, collapse="")
secret_code
## [1] "CONGRATULATIONS.YOU.ARE.A.SUPERNERD!"