R> name [1] “Moe Szyslak” “Burns, C. Montgomery” “Rev. Timothy Lovejoy” [4] “Ned Flanders” “Simpson, Homer” “Dr. Julius Hibbert”
library(stringr)
raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name2 <- unlist(str_extract_all(raw.data, "[A-Za-z ,.]{2,}"))
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
first_name <- unlist(str_extract_all(name, "[:punct:] [[:alpha:]]{2,}$|[[:alpha:]]{2,} "))
first_name <- unlist(str_extract_all(first_name,"\\w+"))
first_name
## [1] "Moe" "Montgomery" "Timothy" "Ned" "Homer"
## [6] "Julius"
#Alternate syntax for non stringr package.
#first_name2 <- unlist(str_extract_all(name,"[,.] [A-Za-z]{2,}|\w{2,}"))
#first_name2 <- unlist(str_extract_all(first_name,"\\w+"))
last_name <- unlist(str_extract_all(name, "[^[:punct:]] [[:alpha:]]{2,}$|[[:alpha:]]{2,}, "))
last_name <- unlist(str_extract_all(last_name,"[[:alpha:]]{2,}"))
data.frame(first_name, last_name)
## first_name last_name
## 1 Moe Szyslak
## 2 Montgomery Burns
## 3 Timothy Lovejoy
## 4 Ned Flanders
## 5 Homer Simpson
## 6 Julius Hibbert
str_detect(name, unlist(str_extract_all(name, "[[:alpha:]]{2,}\\.")))
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
I am assuming second name means middle name.
str_detect(name, unlist(str_extract_all(name, "[A-Z]\\.")))
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
example1 <- c("abcdefh$", "12456$45", "ads$afsa", "1234$abcd34$")
unlist(str_extract_all(example1,"[0-9]+\\$"))
## [1] "12456$" "1234$" "34$"
String of numbers[0-9] followed by “$” sign.
example2 <- c("tiny", "adasgvaf123", "aafdafa$", "also", "two")
unlist(str_extract_all(example2,"\\b[a-z]{1,4}\\b"))
## [1] "tiny" "also" "two"
the query above extracts [a-z] off of edges of the words of length 1 but not more than 4 characters. So the words should be of length 4 or less but consecutive.
example3 <- c("tiny", "ravi..txt", "123.txt", "234txt", "345.txt")
unlist(str_extract_all(example3,".*?\\.txt$"))
## [1] "ravi..txt" "123.txt" "345.txt"
The above query will return anything which ends with .txt
example4 <- c("tiny", "123445", "133/45/12333", "044/333/1984", "34/")
unlist(str_extract_all(example4,"\\d{2}/\\d{2}/\\d{4}"))
## [1] "33/45/1233"
The above query is to extract any two digit just before a “/” then again exactly 2 digits followed by a “/” and then first four digits after the second “/”"
example5 <- c("<html>Text</html>", "123445", "133/45/12333", "044/333/1984", "34/")
unlist(str_extract_all(example5,"<(.+?)>.+?</\\1>"))
## [1] "<html>Text</html>"
The above query extracts the opening and closing tags like html tags. The opening and closing should match followed including a “/” for closing tags.
clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5 fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr
secret <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
message <- unlist(str_extract_all(secret, "[[:upper:][[:punct:]]]"))
(str_replace_all(paste(message, collapse = ''), "[.]", " "))
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD!"
```