Week 3

Homework 3

library(stringr)
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
raw.data
## [1] "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))

(a). Use the tools of this chapter to rearrange the vector so that all elements conform to the standard firstname last_name.

name2 <- sub(" [A-z]{1}\\. "," ",name)
name3 <- sub("(\\w+),\\s(\\w+)","\\2 \\1", name2)
name4 <- sub("[A-z]{2,3}\\. ","",name3)
name4
## [1] "Moe Szyslak"      "Montgomery Burns" "Timothy Lovejoy" 
## [4] "Ned Flanders"     "Homer Simpson"    "Julius Hibbert"

(b). Construct a logical vector indicating whether a character has a title.

title <- str_detect(name3,"[A-z]{2,3}\\. ")
df <- data.frame(name3, title)
df
##                  name3 title
## 1          Moe Szyslak FALSE
## 2     Montgomery Burns FALSE
## 3 Rev. Timothy Lovejoy  TRUE
## 4         Ned Flanders FALSE
## 5        Homer Simpson FALSE
## 6   Dr. Julius Hibbert  TRUE

(c). Construct a logical vector indicating whether a character has a second name.

secondname <- str_detect(name," [A-z]{1}\\. ")
df <- data.frame(name, secondname)
df
##                   name secondname
## 1          Moe Szyslak      FALSE
## 2 Burns, C. Montgomery       TRUE
## 3 Rev. Timothy Lovejoy      FALSE
## 4         Ned Flanders      FALSE
## 5       Simpson, Homer      FALSE
## 6   Dr. Julius Hibbert      FALSE
  1. Describe the tyoes of strings that conform to the following regular expressions and construct an exmaple that is matched by the regular expression. (a).
    0 − 9
    +\$ One or more numbers followed by $ symbol.
library(stringr)
example <- c ("12$", "45$")
str_detect(example, "[0-9]+\\$")
## [1] TRUE TRUE

(b). \b
a − z
{1,4}\b A word contains 1 to 4 letters.

example2 <- c("ok", "four")
str_detect(example2, "\\b[a-z]{1,4}\\b" )
## [1] TRUE TRUE

(c). .*?\.txt$ string ends with .txt

example3 <- c ("abc.txt", "1.txt")
str_detect(example3,".*?\\.txt$" )
## [1] TRUE TRUE

(d). \d{2}/\d{2}/\d{4} numbers have format in nn/nn/nnnn

example4 <- c("01/09/1991", "08/14/1965")
str_detect(example4, "\\d{2}/\\d{2}/\\d{4}")
## [1] TRUE TRUE

(e).<(.+?)>.+?</\1> characters inside <> followed by another charater in the <> with / in it.

example5 <- c("<tag>Text</tag>")
str_detect(example5, "<(.+?)>.+?</\\1>")
## [1] TRUE
  1. The following code hides a secret message.
secret_message <- paste("clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr")
message <- unlist(str_extract_all(secret_message, "[[:upper:].]{1,}"))
message <- str_replace_all(paste(message, collapse = ''), "[.]", " ")
message
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"