library(stringr)
raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
names<- unlist(str_extract_all(raw.data, "[[:alpha:],. ]{2,}"))
names
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
firstNames = unlist(str_extract_all(names, "[.,] [[A-z]]{2,}$|[[A-z]]{2,} "))
firstNames = unlist(str_extract_all(firstNames, "[[A-z]]{2,}"))
print (firstNames)
## [1] "Moe" "Montgomery" "Timothy" "Ned" "Homer"
## [6] "Julius"
lastNames = unlist(str_extract_all(names, "[^[.,]] [[A-z]]{2,}$|[[A-z]]{2,}, "))
lastNames = unlist(str_extract_all(lastNames, "[[A-z]]{2,}"))
print(lastNames)
## [1] "Szyslak" "Burns" "Lovejoy" "Flanders" "Simpson" "Hibbert"
titles = unlist(str_extract_all(names, "[[A-z]]{2,}\\."))
print(titles)
## [1] "Rev." "Dr."
data.frame("Name" = names, "Title Exists" = str_detect(names, titles))
## Name Title.Exists
## 1 Moe Szyslak FALSE
## 2 Burns, C. Montgomery FALSE
## 3 Rev. Timothy Lovejoy TRUE
## 4 Ned Flanders FALSE
## 5 Simpson, Homer FALSE
## 6 Dr. Julius Hibbert TRUE
secondNames = unlist(str_extract_all(names, " [[A-z]]{1}\\.? [[A-z]]{1,}\\.?"))
secondNames = unlist(str_extract_all(secondNames,"[[A-z]]{1}\\.? [[A-z]]{1,}\\.?"))
print(secondNames)
## [1] "C. Montgomery"
string= c("", "01\\", "0012$", "02020", "738372", "34384782347832", "34")
str_detect(string, "[0-9]+\\$")
## [1] FALSE FALSE TRUE FALSE FALSE FALSE FALSE
string= c("a", "bc", "def", "ghij", "klmno", "02020", "34384782347832", "34")
str_detect(string, "\\b[a-z]{1,4}\\b")
## [1] TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE
string= c("a", "bc", "def", "ghij.", "klmn.txt", "*.txt", "pqr874238743.txt", ".txt")
str_detect(string, ".*?\\.txt$")
## [1] FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE
string= c("1/1/2016", "02/02/2016", "20/20/0000", "1/1/16", "1/13/293393", "34384782347832", "34")
str_detect(string, "\\d{2}/\\d{2}/\\d{4}")
## [1] FALSE TRUE TRUE FALSE FALSE FALSE FALSE
string= c("<html>Hello World</html>", "<html>Hello World<html>", "34384782347832", "34")
str_detect(string, "<(.+?)>.+?</\\1>")
## [1] TRUE FALSE FALSE FALSE
secret_message = "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hprfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPalotfb7wEm24k6t3sR9zqe5fy89n6N5t9kc4fE905gmc4Rgxo5nhDk!gr"
print(secret_message)
## [1] "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hprfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPalotfb7wEm24k6t3sR9zqe5fy89n6N5t9kc4fE905gmc4Rgxo5nhDk!gr"
paste(unlist(str_extract_all(secret_message, "[:upper:]|[:punct:]")), collapse = "")
## [1] "CONGRATULATIONS.YOU.ARE.A.SUPERNERD!"