raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555
-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson,Homer5553642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson,Homer" "Dr. Julius Hibbert"
(a) Use the tools of this chapter to rearrange the vector so that all elements conform to the standard first_name last_name.
standard <- paste(str_extract(name,"[^DrRev][:alpha:]{1,}"),str_extract(name,"(?<=[:blank:]|,)[a-zA-Z]{1,}$"))
standard
## [1] "Moe Szyslak" "Burns Montgomery" " Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson Homer" " Julius Hibbert"
(b) Construct a logical vector indicating whether a character has a title (i.e., Rev.ย and Dr. ).
has_tittle <- str_detect(name,"(^[a-zA-Z]{1,}\\.)")
has_tittle
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
#titles
unlist(str_extract_all(name,"(^[a-zA-Z]{1,}\\.)"))
## [1] "Rev." "Dr."
#vector type
typeof(has_tittle)
## [1] "logical"
(c) Construct a logical vector indicating whether a character has a second name.
has_2name <- str_detect(name,"[a-zA-Z]{1,}\\.\\s(?=([a-zA-Z]{1,})$)")
#extract second name
unlist(str_extract_all(name,"[a-zA-Z]{1,}\\.\\s(?=([a-zA-Z]{1,})$)"))
## [1] "C. "
typeof(has_2name)
## [1] "logical"
(a) [0-9]+\$
Matches a string that has one or more numeric characters followed by a $ sign in that same string
#true if if numbers and $ are present in the same string one after the other
str_detect("6685$6","[0-9]+\\$")
## [1] TRUE
#false if the line does not have numbers
str_detect("$","[0-9]+\\$")
## [1] FALSE
#false if the line does not have $ sign
str_detect("6685","[0-9]+\\$")
## [1] FALSE
(b)\b[a-z]{1,4}\b
Matches any lower case strings that have from 1 to 4 letters/characters
#returns all the matches that have less than 4 lowercase characters
str_extract_all("I Am trying this reg Expression","\\b[a-z]{1,4}\\b")
## [[1]]
## [1] "this" "reg"
(c).*?\.txt$
This expression reads a complete string and returns a match if the last string at the end of the line has the .txt characters.
#true if at the end
str_detect("I am lewris.txt",".*?\\.txt$")
## [1] TRUE
#false if the line does not end with .txt
str_detect("I am lewris.txt but not at the end",".*?\\.txt$")
## [1] FALSE
(d) \d{2}/\d{2}/\d{4}
Matches any occurence of dates in formats mm/dd/yyyy or dd/mm/yyyy tha appear in any string.
str_extract_all("5505/06/1886, 10/06/1885, 1/2/2016 ","\\d{2}/\\d{2}/\\d{4}")
## [[1]]
## [1] "05/06/1886" "10/06/1885"
(e) <(.+?)>.+?</\1>
Matches any occurence of a strings properly formated as html tags with opening and closing tags that are not empty. The content between html tags is returned even if more html tags are present.
str_extract_all("<empty></empty>><tittle>aaaa</tittle>","<(.+?)>.+?</\\1>")
## [[1]]
## [1] "<tittle>aaaa</tittle>"
#tags inside tags are also returned
str_extract_all("<empty></empty>><tittle>aaaa</tittle><body><h1>trying html tags</h1></body>","<(.+?)>.+?</\\1>")
## [[1]]
## [1] "<tittle>aaaa</tittle>"
## [2] "<body><h1>trying html tags</h1></body>"