raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
names = unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
names1 = gsub("\\w+\\.", "", names)
namesfinal=gsub("\\,", "", names1)
namesfinal
## [1] "Moe Szyslak" "Burns Montgomery" " Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson Homer" " Julius Hibbert"
str_detect(names, "(Rev.|Dr.)")
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
str_detect(names, "[:upper:]\\.")
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
To solve this problem, we remove from the original strings a) low case letters b)Digits c)d)replace . with a space.
string = "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
string1 = gsub("[a-z]", "", string)
string2 = gsub("[0-9]", "", string1)
string3 = gsub("\n", "", string2)
string4 = gsub("\\.", " ", string3)
string4
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD!"