Question 3

raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
raw.data
## [1] "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
# Re arrange the vector using tools in Chapter 11 so that all the elements confirm to the standard first_name last_name
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name
## [1] "Moe Szyslak"          "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Julius Hibbert"
first_last <- unlist(sub("([[:alnum:]_]{1,}),  *([[:alnum:]_]{1,})", "\\2 \\1", sub("([[:alnum:]_]{1,})\\. "," ",name)))
first_last
## [1] "Moe Szyslak"      "Montgomery Burns" " Timothy Lovejoy"
## [4] "Ned Flanders"     "Homer Simpson"    " Julius Hibbert"
# Logical vector to indicate whether the character  has a title
title_vector <- c(str_detect(name, "(^[[:alnum:]_]{1,}\\.).*"))
name_title <- data.frame(name, title_vector)
name_title
##                   name title_vector
## 1          Moe Szyslak        FALSE
## 2 Burns, C. Montgomery        FALSE
## 3 Rev. Timothy Lovejoy         TRUE
## 4         Ned Flanders        FALSE
## 5       Simpson, Homer        FALSE
## 6   Dr. Julius Hibbert         TRUE
# Logical vector to indicate whether the character has a second name
second_name <- c(str_detect(name, " [[:alnum:]_]{1,}\\. "))
is_second <- data.frame(name, second_name)
is_second
##                   name second_name
## 1          Moe Szyslak       FALSE
## 2 Burns, C. Montgomery        TRUE
## 3 Rev. Timothy Lovejoy       FALSE
## 4         Ned Flanders       FALSE
## 5       Simpson, Homer       FALSE
## 6   Dr. Julius Hibbert       FALSE

Question 4

Discribe the type of strings and construct an example
(1) [0-9]+$

Any digit between 0 to 9 followed by $

str_extract("ewqee3121$", "[0-9]+\\$")
## [1] "3121$"
  1. \b[a-z]{1,4}\b

set of strings with 1-4 lower case letters

strings <- c("abcde", "abcd", "ab", "1234", "abc", "ABcd","12/03/2010", "10/27/1982", "text.txt", "data.csv", "essay.docx", "<header>My Web Site</header>")
unlist(str_extract_all(strings, "\\b[a-z]{1,4}\\b"))
## [1] "abcd" "ab"   "abc"  "text" "txt"  "data" "csv"  "docx"
  1. **.*?\.txt$**

return strings ending with .txt

unlist(str_extract_all(strings, ".*?\\.txt$" ))
## [1] "text.txt"
  1. \d{2}/\d{2}/\d{4}

string that contains digits in the format: xx/xx/xxxx

unlist(str_extract_all(strings, "\\d{2}/\\d{2}/\\d{4}"))
## [1] "12/03/2010" "10/27/1982"
  1. <(.+?)>.+?</\1>

strings that contains opening and clossing tags as in html coding. Ex: website

unlist(str_extract_all(strings, "<(.+?)>.+?</\\1>"))
## [1] "<header>My Web Site</header>"

Question 9

secret_message <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"

uppercase <- paste(unlist(str_extract_all(secret_message, "[[A-Z].?]")), collapse = "")
lowercase <- paste(unlist(str_extract_all(secret_message, "[[a-z].?]")), collapse = "")
numbers <- paste(unlist(str_extract_all(secret_message, "[[0-9].?]")), collapse = "")
uppercase
## [1] "CONGRATULATIONS.YOU.ARE.A.SUPERNERD"

Uppercase letters together makes a meaningful statement.

lowercase
## [1] "clcopowzmstcdwnkigvdicpuggvhrynjuwczihqrfpxsjdwpnanwowisdijjkpfdrcocbtyczjataootjtjnecfek.rwwwojigdvrfrbz.bknbhzgvizcrop.wgnb.qofaotfbwmktszqefyndtkcfgmcgxonhkgr"
numbers
## [1] "1087792855078035307553364.1162.24905..651724639589659490545"

Decoded secret message

decoded_message <- str_replace_all(uppercase, "[.]"," ")
decoded_message
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"