library(stringr)raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
phone <- unlist(str_extract_all(raw.data, "\\(?(\\d{3})?\\)?(-| )?\\d{3}(-| )?\\d{4}"))
phone## [1] "555-1239" "(636) 555-0113" "555-6542" "555 8904"
## [5] "636-555-3226" "5553642"
data.frame(name=name, phone=phone)## name phone
## 1 Moe Szyslak 555-1239
## 2 Burns, C. Montgomery (636) 555-0113
## 3 Rev. Timothy Lovejoy 555-6542
## 4 Ned Flanders 555 8904
## 5 Simpson, Homer 636-555-3226
## 6 Dr. Julius Hibbert 5553642
name## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
tempname <- str_replace_all(name, "(.+)(, .+)$", "\\2 \\1")
tempname <- str_replace_all(tempname, ", ", "")
tempname <- str_replace_all(tempname, "[A-Z][a-z]([a-z]?)\\.", "")
tempname## [1] "Moe Szyslak" "C. Montgomery Burns" " Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" " Julius Hibbert"
# Create lists with first name and last name
fname <- str_extract(tempname, "[[:alpha:]+]{2,}")
lname <- str_extract(tempname, "[[:space:]][[:alpha:]+]{2,}")
fname## [1] "Moe" "Montgomery" "Timothy" "Ned" "Homer"
## [6] "Julius"
lname## [1] " Szyslak" " Montgomery" " Timothy" " Flanders" " Simpson"
## [6] " Julius"
namedf <- data.frame(fname=fname, lname=lname)
namedf## fname lname
## 1 Moe Szyslak
## 2 Montgomery Montgomery
## 3 Timothy Timothy
## 4 Ned Flanders
## 5 Homer Simpson
## 6 Julius Julius
hasTitle <- str_detect(name, "Rev.|Dr.")
hasTitle## [1] FALSE FALSE TRUE FALSE FALSE TRUE
hasSecondName <- str_detect(name, "[A-Z]\\.")
hasSecondName## [1] FALSE TRUE FALSE FALSE FALSE FALSE
pattern <- "[0-9]+\\$"
list=c("555$","ab12$USD",".01$","23213.121")
str_detect(list,pattern)## [1] TRUE TRUE TRUE FALSE
pattern <- "\\b[a-z]{1,4}\\b"
list=c("a","bb212", "xyzbc","xz","aaa","12abcdefgh12a", "1234")
str_detect(list,pattern)## [1] TRUE FALSE FALSE TRUE TRUE FALSE FALSE
pattern <- ".*?\\.txt$"
list=c("a.txt","bb212", "xyzbc.txt","xz","aaa.txt","12abcdefgh12a", "1234.txt")
str_detect(list,pattern)## [1] TRUE FALSE TRUE FALSE TRUE FALSE TRUE
pattern <- "\\d{2}/\\d{2}/\\d{4}"
list=c("10/10/2015 Check Date", "20202015", "someone2018/02/07", "99999999", "99/99/9999")
str_detect(list,pattern)## [1] TRUE FALSE FALSE FALSE TRUE
pattern <- "<(.+?)>.+?</\\1>"
list=c("<b>name</b>", "<a href=testurl>link</a>", "324324 sdsadd 213213","<xml><head></head></xml>", "99999999")
str_detect(list,pattern)## [1] TRUE FALSE FALSE TRUE FALSE
##
# The message clearly is written in capital, so we can just extract everything in capital
##
message <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
unlist(str_extract_all(message, "[[:upper:].!]"))## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" "." "Y"
## [18] "O" "U" "." "A" "R" "E" "." "A" "." "S" "U" "P" "E" "R" "N" "E" "R"
## [35] "D" "!"
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.