Please deliver links to an R Markdown file (in GitHub and rpubs.com) with solutions to problems 3 and 4 from chapter 8 of Automated Data Collection in R. Problem 9 is extra credit. You may work in a small group, but please submit separately with names of all group participants in your submission.
Here is the referenced code for the introductory example in #3:
library(stringr)
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
#separate the last names
last_name <- str_extract(name,"[[:alpha:]]+($|,)")
last_name <- str_extract(last_name, "[[:alpha:]]+")
last_name
## [1] "Szyslak" "Burns" "Lovejoy" "Flanders" "Simpson" "Hibbert"
#separate the first names
first_name <- str_replace(name, "C.", "Charles")
first_name <- str_extract(first_name, "([[:alpha:]]+( ))")
first_name[is.na(first_name)] <- str_extract(name, "([[:punct:]])\\s[[:alpha:]]+")[is.na(first_name)]
first_name <- str_extract(first_name, "[[:alpha:]]+")
#make a data frame
first_last <- data.frame(first_name, last_name)
first_last
## first_name last_name
## 1 Moe Szyslak
## 2 Charles Burns
## 3 Timothy Lovejoy
## 4 Ned Flanders
## 5 Homer Simpson
## 6 Julius Hibbert
str_detect(name, "[[:alpha:]]{2,3}[.]")
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
second_name <- str_replace(name, "[[:alpha:]]{2,3}[.]", " ")
second_name <- str_count(second_name, "\\w+")
second_name > 2
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
a <- c("123456$")
unlist(str_extract_all(a, "[0-9]+\\$"))
## [1] "123456$"
Strings of lowercase letters of length one to four and are word edges.
b <- c("egg", "table", "$weep")
unlist(str_extract_all(b, "\\b[a-z]{1,4}\\b"))
## [1] "egg" "weep"
Strings that end in “.txt”.
c <- c("egg.txt", "table", "$weep.txt")
unlist(str_extract_all(c, ".*?\\.txt$"))
## [1] "egg.txt" "$weep.txt"
d <- c("egg.txt", "04/19/1995", "$weep.txt", "02/28/1992")
unlist(str_extract_all(d, "\\d{2}/\\d{2}/\\d{4}"))
## [1] "04/19/1995" "02/28/1992"
e <- c("<b>egg.txt</b>", "<0>4/19/1995</0>", "<.txt>")
unlist(str_extract_all(e, "<(.+?)>.+?</\\1>"))
## [1] "<b>egg.txt</b>" "<0>4/19/1995</0>"
m <- c("<clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr")
n <- unlist(str_extract_all(m, "[[A-Z].]{1,}"))
o <- paste(n, collapse = "")
p <- str_replace_all(o, "\\.", " ")
p
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"