library(stringr)
raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:].,, ]{2,}"))
Rearrange vector as first name last name
nameFirstLast <- sapply(strsplit(name, ", "),function(x){paste(rev(x),collapse=" ")})
print(nameFirstLast)
## [1] "Moe Szyslak" "C. Montgomery Burns" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Dr. Julius Hibbert"
Logic vector whether a character has a title
str_detect(name, c("Dr.|Rev."))
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
Logic vector whether a character has a second name (abbreviated first name)
str_detect(name, "[A-Z][.]")
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
#[0-9]+\$ Any number between 0-9, 1 or more times at the end of a line.
111
#\b[a-z]{1,4}\b A word starts with any lowercase letter a-z and is between 1-4 letters long.
cat
#.*?\.txt$ Any character followed by “.txt” at the end of the line. This is a lazy format so the program stops at the first “.txt”.
blah.txt
#\d{2}/\d{2}/\d{4} Two digits 0-9, a forward slash, two digits 0-9, a forward slash, then four digits 0-9
09/15/2019
#<(.+?)>.+?</\1> Opening tag with any character in lazy format, any character in lazy format, closing tag matching the opening tag.
#