Week 3 Assignment


Please deliver links to an R Markdown file (in GitHub and rpubs.com)
Problem 9 is extra credit.


R-Datacollection GitHub

3 Copy the introductory example. The vector name stores the extracted names


library(stringr)
library(knitr)

raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"

name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
phone <- unlist(str_extract_all(raw.data, "\\(?(\\d{3})?\\)?(-| )?\\d{3}(-| )?\\d{4}"))
name
## [1] "Moe Szyslak"          "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Julius Hibbert"
kable(data.frame(name,phone))
name phone
Moe Szyslak 555-1239
Burns, C. Montgomery (636) 555-0113
Rev. Timothy Lovejoy 555-6542
Ned Flanders 555 8904
Simpson, Homer 636-555-3226
Dr. Julius Hibbert 5553642

3 (a). Use the tools of this chapter to rearrange the vector so that all elements conform to the standard firstname last_name.


name2 <- sub(" [A-z]{1}\\. "," ",name)
name3 <- sub("(\\w+),\\s(\\w+)","\\2 \\1", name2)
first_and_lastname <- sub("[A-z]{2,3}\\. ","",name3)
kable(data.frame(first_and_lastname))
first_and_lastname
Moe Szyslak
Montgomery Burns
Timothy Lovejoy
Ned Flanders
Homer Simpson
Julius Hibbert

3 (b). Construct a logical vector indicating whether a character has a title.


title <- str_detect(name3,"[A-z]{2,3}\\. ")
df <- data.frame(name3, title)
kable(data.frame(df))
name3 title
Moe Szyslak FALSE
Montgomery Burns FALSE
Rev. Timothy Lovejoy TRUE
Ned Flanders FALSE
Homer Simpson FALSE
Dr. Julius Hibbert TRUE
`

3 (c). Construct a logical vector indicating whether a character has a second name.


secondname <- str_detect(name," [A-z]{1}\\. ")
df <- data.frame(name, secondname)
df
##                   name secondname
## 1          Moe Szyslak      FALSE
## 2 Burns, C. Montgomery       TRUE
## 3 Rev. Timothy Lovejoy      FALSE
## 4         Ned Flanders      FALSE
## 5       Simpson, Homer      FALSE
## 6   Dr. Julius Hibbert      FALSE
`

4 Describe the tyoes of strings that conform to the following regular expressions and construct an exmaple that is matched by the regular expression.

4 (a). [0-9] +\$ #One or more numbers followed by $ symbol.

library(stringr)
example <- c ("12$", "45$")
str_detect(example, "[0-9]+\\$")
## [1] TRUE TRUE
`

4(b) \b[a-z]{1,4}\b #A word contains 1 to 4 letters.

example2 <- c("ok", "four")
str_detect(example2, "\\b[a-z]{1,4}\\b" )
## [1] TRUE TRUE
`

4 (c) .*?\.txt$ #string ends with .txt

pattern3=".*?\\.txt$"
example3=c(".txt","123.txt","abc.txt","a$b#1.txt")
str_detect(example3,pattern3)
## [1] TRUE TRUE TRUE TRUE
`

4 (d). \d{2}//\d{4} #numbers have format in nn/nn/nnnn

example4 <- c("01/09/1991", "08/14/1965")
str_detect(example4, "\\d{2}/\\d{2}/\\d{4}")
## [1] TRUE TRUE
`

4 (e) <(.+?)>.+?</\1> #characters inside <> followed by another charater in the <> with / in it.

example5 <- c("<tag>Text</tag>")
str_detect(example5, "<(.+?)>.+?</\\1>")
## [1] TRUE
`

Problem 9

The following code hides a secret message.

secret_message <- paste("clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr") 
message <- unlist(str_extract_all(secret_message, "[[:upper:].]{1,}"))
message <- str_replace_all(paste(message, collapse = ''), "[.]", " ")
message
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"
the_end<- paste("3nc4gxo5nhk!gr.Tanwor1w1H4dr3cocfE9.2bk4fE89n6N5nhDk!.") #!gr .--Added after this..
the_end <- unlist(str_extract_all(the_end, "[[:upper:].]{1,}"))
the_end <- str_replace_all(paste(the_end, collapse = ''), "[.]", " ")
the_end
## [1] " THE END "



Please email to: kleber.perez@live.com for any suggestion.

    Data607 Week 3 Assignment - MSDS 2019 Program.