Please deliver links to an R Markdown file (in GitHub and rpubs.com) with solutions to problems 3 and 4 from chapter 8 of Automated Data Collection in R. Problem 9 is extra credit.

library(stringr)

raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer555-3642Dr. Julius Hibbert"
raw.data
## [1] "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer555-3642Dr. Julius Hibbert"
# First_name and Last_name.
names <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}")) 
names
## [1] "Moe Szyslak"          "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Julius Hibbert"
#Phone numbers
phone <- unlist(str_extract_all(raw.data,"\\(?(\\d{3})?\\)?(-| )?\\d{3}(-| )?\\d{4}"))
phone
## [1] "555-1239"       "(636) 555-0113" "555-6542"       "555 8904"      
## [5] "636-555-3226"   "555-3642"
# Create a data frame
df <- data.frame(names=names, phone=phone)
df
##                  names          phone
## 1          Moe Szyslak       555-1239
## 2 Burns, C. Montgomery (636) 555-0113
## 3 Rev. Timothy Lovejoy       555-6542
## 4         Ned Flanders       555 8904
## 5       Simpson, Homer   636-555-3226
## 6   Dr. Julius Hibbert       555-3642
  1. Use the tools of this chapter to rearrange the vector so that all elements conform to the standard
names <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}")) 
names
## [1] "Moe Szyslak"          "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Julius Hibbert"
  1. Construct a logical vector indicating whether a character has a title (i.e., Rev.ย and Dr.).
title <- str_detect(names, "Rev.|Dr.")
title
## [1] FALSE FALSE  TRUE FALSE FALSE  TRUE
  1. Construct a logical vector indicating whether a character has a second name.
second_name <- str_detect(names," [A-Z]\\.")
second_name
## [1] FALSE  TRUE FALSE FALSE FALSE FALSE
  1. Describe the types of strings that conform to the following regular expressions and construct an example that is matched by the regular expression.
  1. [0-9]+\$ Matches any number of digits followed by $
str_extract("vbftghr234568$", "[0-9]+\\$")
## [1] "234568$"
  1. \b[a-z]{1,4}\b Matches any string with lower case of up to 4 characters
string <- c("rtyu", "ty", "r", "1234")
str_extract(string, "\\b[a-z]{1,4}\\b")
## [1] "rtyu" "ty"   "r"    NA
  1. .*?\.txt$ Can be used to return any string ending in txt
string <- c("str.txt", "xyz.csv", ".txt")
str_extract(string, ".*?\\.txt$")
## [1] "str.txt" NA        ".txt"
  1. \d{2}/\d{2}/\d{4} Can be used to return number in date format
string <- c("09122019", "9/12/2019", "09/12/2019")
str_extract(string, "\\d{2}/\\d{2}/\\d{4}")
## [1] NA           NA           "09/12/2019"
  1. <(.+?)>.+?</\1> Can be used to return HMTL/XML tag - string
string <-c("<title>document</title>", "<title>document<title>", "<title>document")
str_extract(string, "<(.+?)>.+?</\\1>")
## [1] "<title>document</title>" NA                       
## [3] NA

BONUS Question (9)

The following code hides a secret message. Crack it with R and regular expressions. Hint: Some of the characters are more revealing than others! The code snippet is also available in the materials at www.r-datacollection.com.

raw <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"

Modifying the vector

separate <- paste(str_replace(unlist(str_extract_all(raw, "[[:upper:].!]")), "[.]", " "))
separate_adv <- str_c(separate, collapse = "")

Displaying Final Result

noquote(separate_adv)
## [1] CONGRATULATIONS YOU ARE A SUPERNERD!

END