Exercise 3

raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"

library(stringr)

name<-unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))

name
## [1] "Moe Szyslak"          "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Julius Hibbert"
#extract phone number
phone <- unlist(str_extract_all(raw.data, "\\(?(\\d{3})?\\)?(-| )?\\d{3}(-| )?\\d{4}"))
phone
## [1] "555-1239"       "(636) 555-0113" "555-6542"       "555 8904"      
## [5] "636-555-3226"   "5553642"
#Createw data frame
data.frame(name= name)
#get first name
fname <- unlist(str_extract_all(name, "[:punct:] [[:alpha:]]{2,}$|[[:alpha:]]{2,} "))
first_name<-unlist(str_extract_all(fname, "[[:alpha:]]{2,}"))
first_name
## [1] "Moe"        "Montgomery" "Timothy"    "Ned"        "Homer"     
## [6] "Julius"
#get Last Name
lname<-unlist(str_extract_all(name, "[^[:punct:]] [[:alpha:]]{2,}$|[[:alpha:]]{2,}, "))
last_name<-unlist(str_extract_all(lname, "[[:alpha:]]{2,}"))
last_name
## [1] "Szyslak"  "Burns"    "Lovejoy"  "Flanders" "Simpson"  "Hibbert"
#Update data frame
data.frame(first_name=first_name, last_name=last_name)

Construct a logical vector indicating wheter a character has a title

#Extract title from characters
title<-unlist(str_extract_all(name,"[[:alpha:]]{2,}\\."))
title
## [1] "Rev." "Dr."
#detect title in list
title_detect<-unlist(str_detect(name, title))
title_detect
## [1] FALSE FALSE  TRUE FALSE FALSE  TRUE
#Update data frame
data.frame(first_name=first_name, last_name=last_name, title_detect=title_detect)

Construct a logical vector to indicate whether a character has a middle name

second_name<-unlist(str_detect(name, " [[:alpha:]]{1}\\.? [[:alpha:]]{1,}\\.?"))
second_name
## [1] FALSE  TRUE FALSE FALSE FALSE FALSE
#Update data fram
data.frame(first_name=first_name, last_name=last_name, title_detect=title_detect, second_name=second_name)

Describe the types of strings that conform to the following regular expressions and

construct an example that is matched by the regular expression.

library(stringr)
pat<-"[0-9]+\\$"
#Meaning one or more number followes by a number
ex<-c("56$", "bill67$balloon", "12345$")
nber_detect<-str_detect(ex,pat)
nber_detect
## [1] TRUE TRUE TRUE
pattern="\\b[a-z]{1,4}\\b"
#Meaning a lowercase word of minimum 1 letter and maximum 4 letters
ex<-c("h","sde", "wxyz")
letter_detect<-unlist(str_detect(ex,pattern))
letter_detect
## [1] TRUE TRUE TRUE
pat=".*?\\.txt$"
#Meaning ending by .txt
ex<-c("file.txt", "abc.txt","456abc.txt")
ending_detect<-str_detect(ex,pat)
ending_detect
## [1] TRUE TRUE TRUE
patrn = "\\d{2}/\\d{2}/\\d{4}"
#Meaning number in the format of nn/nn/nnnn
ex<-c("12/08/3456","30/30/3000", "02/09/2019 Excellent")
format_detect<-str_detect(ex,patrn)
format_detect
## [1] TRUE TRUE TRUE
pattern="<(.+?)>.+?</\\1>"
#Meaning one or more element between brackets follow by element and follow by element between bracket
ex<-c("<tag>Text</tag>","<html>Hello world</html>")
el_detect<-str_detect(ex,pattern)
el_detect
## [1] TRUE TRUE

Decode the secret message

mes<-"clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"

decode<-str_extract_all(mes,pattern="[[:upper:]]")

decode
## [[1]]
##  [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" "Y" "O"
## [18] "U" "A" "R" "E" "A" "S" "U" "P" "E" "R" "N" "E" "R" "D"