library(stringr)
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
First, I’ll separate those names that are already in this format from those that need to be re-arranged.
rightOrder <- c(name[1], name[3], name[4], name[6])
wrongOrder <- c(name[2], name[5])
wrongOrder
## [1] "Burns, C. Montgomery" "Simpson, Homer"
Now, we change the order of the names that are not in the correct order.
montgomery <- unlist(str_split(wrongOrder[1], ","))
montgomery
## [1] "Burns" " C. Montgomery"
montgomery <- str_trim(montgomery)
montgomery
## [1] "Burns" "C. Montgomery"
add1 <- str_c(montgomery[2], montgomery[1], sep=" ")
add1
## [1] "C. Montgomery Burns"
homer <- unlist(str_split(wrongOrder[2],","))
homer <- str_trim(homer)
add2 <- str_c(homer[2], homer[1], sep=" ")
Finally, we add these two corrected names to the vector that contains names with the right order.
rightOrder <- c(rightOrder, add1, add2)
rightOrder
## [1] "Moe Szyslak" "Rev. Timothy Lovejoy" "Ned Flanders"
## [4] "Dr. Julius Hibbert" "C. Montgomery Burns" "Homer Simpson"
hasTitle <- str_detect(rightOrder, "[aA-zZ]{2,}\\.")
hasTitle
## [1] FALSE TRUE FALSE TRUE FALSE FALSE
hasSecondName <- str_detect(rightOrder, "[A-Z]{1}\\.")
hasSecondName
## [1] FALSE FALSE FALSE FALSE TRUE FALSE
[0-9]+\$ Strings that have a series of at least 1 integer followed by a dollar sign conform to this expression. Example: abc12847$a
\b[a-z]{1,4}\b Strings that contain 1-4 consecutive lowercase letters conform to this expression. Example: abc
.*?\.txt$ Strings that end in .txt conform to this expression. Example: ok.txt
\d{2}/\d{2}/\d{4} Strings that have two digits, a forward slash, two more digits, another forward slash, and lastly 4 digits conform to this expression. These are dates. Example: 02/13/2019
<(.+?)>.+?</\1> XML tags conform to this expression. This is a tag followed by text and ending in the same tag that it began with, except with a forward slash. Example: < address >This is correct syntax< / address >