Please deliver links to an R Markdown file (in GitHub and rpubs.com) with solutions to problems 3 and 4 from chapter 8 of Automated Data Collection in R. Problem 9 is extra credit. You may work in a small group, but please submit separately with names of all group participants in your submission.

Here is the referenced code for the introductory example in #3:

3a) Use the tools of this chapter to rearrange the vector so that all elements conform to the standard first_name last_name.

First, I need to load the stringr package, the data, and create the ‘name’ vector.

library(stringr)

raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"

name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name

## [1] "Moe Szyslak"          "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Julius Hibbert"

  #separate the last names
    last_name <- str_extract(name,"[[:alpha:]]+($|,)")
    last_name <- str_extract(last_name, "[[:alpha:]]+")
    last_name

## [1] "Szyslak"  "Burns"    "Lovejoy"  "Flanders" "Simpson"  "Hibbert"

  #separate the first names
    first_name <- str_replace(name, "C.", "Charles")
    first_name <- str_extract(first_name, "([[:alpha:]]+( ))")
    first_name[is.na(first_name)] <- str_extract(name, "([[:punct:]])\\s[[:alpha:]]+")[is.na(first_name)]
    first_name <- str_extract(first_name, "[[:alpha:]]+")
 #make a data frame   
first_last <- data.frame(first_name, last_name)
first_last

##   first_name last_name
## 1        Moe   Szyslak
## 2    Charles     Burns
## 3    Timothy   Lovejoy
## 4        Ned  Flanders
## 5      Homer   Simpson
## 6     Julius   Hibbert

3b) Construct a logical vector indicating whether a character has a title (i.e., Rev. and Dr.).

str_detect(name, "[[:alpha:]]{2,3}[.]")

## [1] FALSE FALSE  TRUE FALSE FALSE  TRUE

3c) Construct a logical vector indicating whether a character has a second name.

second_name <- str_replace(name, "[[:alpha:]]{2,3}[.]", " ")
second_name <- str_count(second_name, "\\w+")
second_name > 2

## [1] FALSE  TRUE FALSE FALSE FALSE FALSE

4) Describe the types of strings that conform to the following regular expressions and construct an example that is matched by the regular expression.

[0-9]+\$ Strings of an undefined length consisting of digits and ending in a dollar sign.

a <- c("123456$")
unlist(str_extract_all(a, "[0-9]+\\$"))

## [1] "123456$"

\b[a-z]{1,4}\b

Strings of lowercase letters of length one to four and are word edges.

b <- c("egg", "table", "$weep")
unlist(str_extract_all(b, "\\b[a-z]{1,4}\\b"))

## [1] "egg"  "weep"

.*?\.txt$

Strings that end in “.txt”.

c <- c("egg.txt", "table", "$weep.txt")
unlist(str_extract_all(c, ".*?\\.txt$"))

## [1] "egg.txt"   "$weep.txt"

\d{2}/\d{2}/\d{4} Strings that start with 2 digits, followed by a forward slash, followed by 2 digits, followed by a forward slash, followed by 4 digits.

d <- c("egg.txt", "04/19/1995", "$weep.txt", "02/28/1992")
unlist(str_extract_all(d, "\\d{2}/\\d{2}/\\d{4}"))

## [1] "04/19/1995" "02/28/1992"

<(.+?)>.+?</\1> Strings that contain html formatting, or strings that contain “<”, followed by anything “a”, followed by “>”, followed by anything, followed by “</”, followed by “a”, followed by “>”.

e <- c("<b>egg.txt</b>", "<0>4/19/1995</0>", "<.txt>")
unlist(str_extract_all(e, "<(.+?)>.+?</\\1>"))

## [1] "<b>egg.txt</b>"   "<0>4/19/1995</0>"

9) The following code hides a secret message. Crack it with R and regular expressions. Hint: Some of the characters are more revealing than others!

m <- c("<clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr")
n <- unlist(str_extract_all(m, "[[A-Z].]{1,}"))
o <- paste(n, collapse = "")
p <- str_replace_all(o, "\\.", " ")
p