raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5543642Dr. Julius Hibbert"
library(stringr)

Excersize 3

Copy the introductory example. The vector name stores the extracted names.

name<-unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name
## [1] "Moe Szyslak"          "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Julius Hibbert"

3.1. Use the tools of this chapter to rearrange the vector so that all elements conform to the standard first_name last_name

FirstName<-str_extract(unlist(str_extract(name,"[:alpha:]{2,} |, [:print:]{2,}")),"[A-Z](.+?)[a-z]+")
FirstName
## [1] "Moe"           "C. Montgomery" "Timothy"       "Ned"          
## [5] "Homer"         "Julius"
LastName<-str_extract(unlist(str_extract(name,"[A-z] [:alpha:]{2,}[a-z]$|[:print:]{2,},")),"[A-z][a-z]+|[A-z][a-z]")
LastName
## [1] "Szyslak"  "Burns"    "Lovejoy"  "Flanders" "Simpson"  "Hibbert"
Full_Name<- str_c(FirstName,LastName , sep= " "  )
data.frame (Full_Name)
##             Full_Name
## 1         Moe Szyslak
## 2 C. Montgomery Burns
## 3     Timothy Lovejoy
## 4        Ned Flanders
## 5       Homer Simpson
## 6      Julius Hibbert

3.2. Construct a logical vector indicating wether a character has a title (i.e, Rev.ย and Dr.)

title<-unlist(str_match_all(name, "Dr.|Rev."))
title
## [1] "Rev." "Dr."

3.3 Construct a logical vector indicating whether a character has a second name

sec_name<-unlist(str_match_all(name," [A-Z]\\."))
sec_name
## [1] " C."

Excersize 4.

Describe the types of strings that conform to the following regular expressions and construct an example that is matched by the regular expression.

 ## a.[0-9] equivalent to [:digit:] or \d: digits, 0 1 2 3 4 5 6 7 8 9.The string "[0-9]+\\$" means that there are one or more numbers followed by $ sign. 
string1="[0-9]+\\$"
example1 = c("123456$", "123$")
str_detect(example1, string1)
## [1] TRUE TRUE
 ## b.  \b is useful for word edge and can help to be more specific in the location of matches.
##The string below helps to find the word that has any letters a to z and no more than 4 letters
string2="\\b[a-z]{1,4}\\b"
example2=c("gym", "bug", "ball")
str_detect(example2, string2)
## [1] TRUE TRUE TRUE
 ## c. Finds pattern that ends with .txt
string3=".*?\\.txt$"
example3=c("book.txt", "bear56.txt")
str_detect(example3, string3)
## [1] TRUE TRUE
 ##  d.Finds pattern of numbers in the format 00/00/0000 
string4="\\d{2}/\\d{2}/\\d{4}"
example4=c("08/26/2019", "09/03/2019 School time")
str_detect(example4, string4)
## [1] TRUE TRUE
 ## e. Finds tags in an HTML documents
string5="<(.+?)>.+?</\\1"
example5=c('Weather<tag>is<tag>beautiful', '<tag>I love school</tag>')
str_detect(example4, string4)
## [1] TRUE TRUE

Exersize 5

Rewrite the expression [0-9]+\$ in a way that all elements are altered but the expression performs the same task

case1="[0-9]+\\$"
example=c("7$","123$")
str_detect(example,case1)
## [1] TRUE TRUE
case2="\\d+[$]"
example=c("7$","123$")
str_detect(example,case2)
## [1] TRUE TRUE

Excersize 9

code <- c("clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr")
unlist(str_extract_all(code, "[:alpha:]"))
##   [1] "c" "l" "c" "o" "p" "C" "o" "w" "z" "m" "s" "t" "c" "d" "w" "n" "k"
##  [18] "i" "g" "O" "v" "d" "i" "c" "p" "N" "u" "g" "g" "v" "h" "r" "y" "n"
##  [35] "G" "j" "u" "w" "c" "z" "i" "h" "q" "r" "f" "p" "R" "x" "s" "A" "j"
##  [52] "d" "w" "p" "n" "T" "a" "n" "w" "o" "U" "w" "i" "s" "d" "i" "j" "L"
##  [69] "j" "k" "p" "f" "A" "T" "I" "d" "r" "c" "o" "c" "b" "t" "y" "c" "z"
##  [86] "j" "a" "t" "O" "a" "o" "o" "t" "j" "t" "N" "j" "n" "e" "c" "S" "f"
## [103] "e" "k" "r" "w" "Y" "w" "w" "o" "j" "i" "g" "O" "d" "v" "r" "f" "U"
## [120] "r" "b" "z" "b" "k" "A" "n" "b" "h" "z" "g" "v" "R" "i" "z" "E" "c"
## [137] "r" "o" "p" "w" "A" "g" "n" "b" "S" "q" "o" "U" "f" "P" "a" "o" "t"
## [154] "f" "b" "w" "E" "m" "k" "t" "s" "R" "z" "q" "e" "f" "y" "n" "N" "d"
## [171] "t" "k" "c" "f" "E" "g" "m" "c" "R" "g" "x" "o" "n" "h" "D" "k" "g"
## [188] "r"
code1 <- unlist(str_extract_all(code, "[[:upper:].!]"))
code1
##  [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" "." "Y"
## [18] "O" "U" "." "A" "R" "E" "." "A" "." "S" "U" "P" "E" "R" "N" "E" "R"
## [35] "D" "!"
code2=paste(code1, collapse="")
Secret_message <- c( str_replace_all(code2, "[\\.]",  "  "))
Secret_message
## [1] "CONGRATULATIONS  YOU  ARE  A  SUPERNERD!"