1. Copy the introductory example. The vector name stores the extracted names.
library(stringr)
## Warning: package 'stringr' was built under R version 3.5.1
raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555
-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson,
Homer5553642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name
## [1] "Moe Szyslak"          "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson,"             "Homer"               
## [7] "Dr. Julius Hibbert"
  1. Use the tools of this chapter to rearrange the vector so that all elements conform to the standard first_name last_name. Answer:
name2 <- sub(" [A-z]{1}\\. "," ",name)  # to remove some initials from the given name
name3 <- sub("(\\w+),\\s(\\w+)","\\2 \\1", name2) # move last and first names
name4 <- sub("[A-z]{2,3}\\. ","",name3)  # to remove anykind of titles that are in the given names titles
name4
## [1] "Moe Szyslak"      "Montgomery Burns" "Timothy Lovejoy" 
## [4] "Ned Flanders"     "Simpson,"         "Homer"           
## [7] "Julius Hibbert"
  1. Construct a logical vector indicating whether a character has a title (i.e., Rev. and Dr.).
has_title <- str_detect(name,"[[:alpha:]]{2,3}[.]")
df <- data.frame(name,has_title)
df
##                   name has_title
## 1          Moe Szyslak     FALSE
## 2 Burns, C. Montgomery     FALSE
## 3 Rev. Timothy Lovejoy      TRUE
## 4         Ned Flanders     FALSE
## 5             Simpson,     FALSE
## 6                Homer     FALSE
## 7   Dr. Julius Hibbert      TRUE
  1. Construct a logical vector indicating whether a character has a second name.
has_second_name <- str_detect(name," [A-z]{1}\\. ")
df <- data.frame(name,has_second_name)
df
##                   name has_second_name
## 1          Moe Szyslak           FALSE
## 2 Burns, C. Montgomery            TRUE
## 3 Rev. Timothy Lovejoy           FALSE
## 4         Ned Flanders           FALSE
## 5             Simpson,           FALSE
## 6                Homer           FALSE
## 7   Dr. Julius Hibbert           FALSE
  1. Describe the types of strings that conform to the following regular expressions and construct an example that is matched by the regular expression.
  1. [0-9]+\$ Answer:
output <- c("0123456789$")
unlist(str_extract_all(output, "[0-9]+\\$"))
## [1] "0123456789$"
  1. \b[a-z]{1,4}\b Answer:
output <- c("one two three four five six seven eight nine ten")
unlist(str_extract_all(output, "\\b[a-z]{1,4}\\b"))
## [1] "one"  "two"  "four" "five" "six"  "nine" "ten"
  1. .*?\.txt$ Answer:
output <- c("characters,  spaces, filename: example.txt")
unlist(str_extract_all(output, ".*?\\.txt$"))
## [1] "characters,  spaces, filename: example.txt"
  1. \d{2}/\d{2}/\d{4} Answer:
output <- c("51/21/4721 2567/771/13")
unlist(str_extract_all(output, "\\d{2}/\\d{2}/\\d{4}"))
## [1] "51/21/4721"
  1. <(.+?)>.+?</\1>
output <- c("<!DOCTYPE html><html><body>Hello Assignment3</body></html></html>")
unlist(str_extract_all(output, "<(.+?)>.+?</\\1>"))
## [1] "<html><body>Hello Assignment3</body></html>"

BONUS QUESTION

  1. The following code hides a secret message. Crack it with R and regular expressions. Hint: Some of the characters are more revealing than others! The code snippet is also available in the materials at www.r-datacollection.com.

clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5 fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr

Answer:

secret_message <- c("clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo",
                "Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO",
                "d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5",
                "fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr")
message <- unlist(str_extract_all(secret_message, "[[:upper:].]{1,}"))
message <- str_replace_all(paste(message, collapse = ''), "[.]", " ")
message
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"