Question 3

library(stringr)
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name
## [1] "Moe Szyslak"          "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Julius Hibbert"

1) Use the tools of this chapter to rearrange the vector so that all elements conform to the standard first_name last_name

name2 <- str_replace_all(name, "(.+)(, .+)$", "\\2 \\1")
name2 
## [1] "Moe Szyslak"           ", C. Montgomery Burns" "Rev. Timothy Lovejoy" 
## [4] "Ned Flanders"          ", Homer Simpson"       "Dr. Julius Hibbert"
name3 <- str_replace_all(name2, ", ", "")
name3
## [1] "Moe Szyslak"          "C. Montgomery Burns"  "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Homer Simpson"        "Dr. Julius Hibbert"
name4 <- str_replace_all(name3, "[A-Z][a-z]([a-z]?)\\.", "")
name4
## [1] "Moe Szyslak"         "C. Montgomery Burns" " Timothy Lovejoy"   
## [4] "Ned Flanders"        "Homer Simpson"       " Julius Hibbert"

2) Construct a logical vector indicating whether a character has a title (i.e., Rev.ย and Dr.).

titles.name <- str_detect(name, "Dr.|Rev.")
titles.name
## [1] FALSE FALSE  TRUE FALSE FALSE  TRUE

3) Construct a logical vector indicating whether a character has a second name.

second.name <- str_count(name, " ") > 1 & str_detect(name, "Dr.|Rev.") == FALSE
second.name
## [1] FALSE  TRUE FALSE FALSE FALSE FALSE

Question 4

1, [0-9]+\$

str_extract_all("My tuition at CUNY SPS this semester is US1,587$", "[0-9]+\\$")
## [[1]]
## [1] "587$"

2,

str_extract_all("My tuition at CUNY SPS this semester is US1,587$", "\\b[a-z]{1,4}\\b")
## [[1]]
## [1] "at"   "this" "is"

3, .*?.txt$

str_extract_all("A file I used was data607.txt", ".*?\\.txt$")
## [[1]]
## [1] "A file I used was data607.txt"

4, //

str_extract_all("My cat was born on 12/31/2018", "\\d{2}/\\d{2}/\\d{4}")
## [[1]]
## [1] "12/31/2018"

5, <(.+?)>.+?</>

str_extract_all("A sample of HTML code I learn is <head>TEST_TEST_TEST</head>", "<(.+?)>.+?</\\1>")
## [[1]]
## [1] "<head>TEST_TEST_TEST</head>"

Question 9

The following code hides a secret message. Crack it with R and regular expressions. Hint: Some of the characters are more revealing than others! The code snippet is also available in the materials at www.r-datacllection.com

SecretCode <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
hiddenmessage <- str_extract_all(SecretCode, "[:upper:]|[:punct:]")
unlist(hiddenmessage)
##  [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" "." "Y"
## [18] "O" "U" "." "A" "R" "E" "." "A" "." "S" "U" "P" "E" "R" "N" "E" "R"
## [35] "D" "!"