Please deliver links to an R Markdown file (in GitHub and rpubs.com) with solutions to problems 3 and 4 from chapter 8 of Automated Data Collection in R. Problem 9 is extra credit. You may work in a small group, but please submit separately with names of all group participants in your submission. Here is the referenced code for the introductory example in #3:
library(stringr)
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
#Use the character class [:alpha:] to extract alphabetic characters
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
#View name
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
R> name
[1] “Moe Syslak” “Burns, C. Montgomery” “Rev.Timothy Lovejoy” [4] “Ned Flanders” “Simpson, Homer” “Dr. Julius Hibbert”
1.- Use tools of this chapter to rearrange the vector so that all the elements conform to the standard
first_name last_name
rearrange_names <- str_split(name, ",")
rearrange_names
## [[1]]
## [1] "Moe Szyslak"
##
## [[2]]
## [1] "Burns" " C. Montgomery"
##
## [[3]]
## [1] "Rev. Timothy Lovejoy"
##
## [[4]]
## [1] "Ned Flanders"
##
## [[5]]
## [1] "Simpson" " Homer"
##
## [[6]]
## [1] "Dr. Julius Hibbert"
rearrange_names <- data.frame(rearrange_names)
rearrange_names
## X.Moe.Szyslak. c..Burns.....C..Montgomery.. X.Rev..Timothy.Lovejoy.
## 1 Moe Szyslak Burns Rev. Timothy Lovejoy
## 2 Moe Szyslak C. Montgomery Rev. Timothy Lovejoy
## X.Ned.Flanders. c..Simpson.....Homer.. X.Dr..Julius.Hibbert.
## 1 Ned Flanders Simpson Dr. Julius Hibbert
## 2 Ned Flanders Homer Dr. Julius Hibbert
ln <- data.frame(rearrange_names[1,])
fn <- data.frame(rearrange_names[2, ])
rearrange_names <- ifelse(fn == ln, ln , rbind(fn, ln))
rearrange_names
## [[1]]
## [1] Moe Szyslak
## Levels: Moe Szyslak
##
## [[2]]
## [1] C. Montgomery Burns
## Levels: C. Montgomery Burns
##
## [[3]]
## [1] Rev. Timothy Lovejoy
## Levels: Rev. Timothy Lovejoy
##
## [[4]]
## [1] Ned Flanders
## Levels: Ned Flanders
##
## [[5]]
## [1] Homer Simpson
## Levels: Homer Simpson
##
## [[6]]
## [1] Dr. Julius Hibbert
## Levels: Dr. Julius Hibbert
2.- Construct a logical vector indicating wheter a character has a title (i.e., Rev and Dr)
title <- str_detect(name, "[[:alpha:]]{2,}\\.")
title
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
3.- Construct a logical vector indicating wheter a character has a second name
secondname <- str_detect(name, "[A-Z]\\.{1}")
secondname
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
1.-[0-9]+\$
Matches numbers 0-9 zero or more times with a dollar sign following the string
example <- "6729$"
regex = "[0-9]+\\$"
str_extract(example, regex)
## [1] "6729$"
2.-\b[a-z{1,4}]\b
Matches character a-z at least 1 time and at most 4 times with empty string at either edge of the word
example <- "abcd efgh"
regex = "\\b[a-z]{1,4}\\b"
str_extract(example, regex)
## [1] "abcd"
3.-*?\.txt$
Matches a string followed by .txt
example <- "abcd.txt"
regex = ".*?\\.txt$"
str_extract(example, regex)
## [1] "abcd.txt"
4.-\d{2}/\d{2}/\d{4}
Matches dates with two digit month, two digit day, and four digit year sepreated by
example <- "01/17/19889"
regex = "\\d{2}/\\d{2}/\\d{4}"
str_extract(example, regex)
## [1] "01/17/1988"
5.-<(.+?)>.+?,/\1>
Matches an HTML tag
example = "<Title>Sometext</head><body>Sometext</body>"
regex = "<(.+?)>.+?</\\1>"
str_extract(example, regex)
## [1] "<body>Sometext</body>"
extra_credit <-"clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
extra_credit
## [1] "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
#Find all uppercase letters
str_extract_all(extra_credit, "[[:upper:]]")
## [[1]]
## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" "Y" "O"
## [18] "U" "A" "R" "E" "A" "S" "U" "P" "E" "R" "N" "E" "R" "D"
INDEED!