library(stringr)
raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
noTitle <- unlist(str_replace(name,"[[:alpha:]]{2,}[.]",""))
noTitle
## [1] "Moe Szyslak" "Burns, C. Montgomery" " Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" " Julius Hibbert"
noSecName <- unlist(str_replace(noTitle,"[[:alpha:]]{1}[.]",""))
noSecName
## [1] "Moe Szyslak" "Burns, Montgomery" " Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" " Julius Hibbert"
firstName <- unlist(str_extract(noSecName,"[[:alpha:]]{2,}[[:space:]]{1,}|[[:punct:]][[:space:]]{1,}[[:alpha:]]{2,}"))
firstName <- unlist(str_replace(firstName,"[[:punct:]][[:space:]]",""))
firstName
## [1] "Moe " " Montgomery" "Timothy " "Ned " "Homer"
## [6] "Julius "
lastName <- unlist(str_extract(noSecName,"[^[:punct:]][[:space:]][[:alpha:]]{2,}|[[:alpha:]]{2,}[[:punct:]]"))
lastName <- unlist(str_replace(lastName,"[[:alpha:]][[:space:]]", ""))
lastName <- unlist(str_replace(lastName,"[[:punct:]]", ""))
lastName
## [1] "Szyslak" "Burns" "Lovejoy" "Flanders" "Simpson" "Hibbert"
paste(firstName, lastName)
## [1] "Moe Szyslak" " Montgomery Burns" "Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Julius Hibbert"
data.frame(firstName, lastName)
## firstName lastName
## 1 Moe Szyslak
## 2 Montgomery Burns
## 3 Timothy Lovejoy
## 4 Ned Flanders
## 5 Homer Simpson
## 6 Julius Hibbert
title <- unlist(str_detect(name,"[[:alpha:]]{2,}[.]"))
title
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
df1 <- data.frame(name,title)
df1
## name title
## 1 Moe Szyslak FALSE
## 2 Burns, C. Montgomery FALSE
## 3 Rev. Timothy Lovejoy TRUE
## 4 Ned Flanders FALSE
## 5 Simpson, Homer FALSE
## 6 Dr. Julius Hibbert TRUE
secName <- unlist(str_detect(noTitle,"[[:alpha:]]{1,}[.]"))
secName
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
df2 <- data.frame(name,secName)
df2
## name secName
## 1 Moe Szyslak FALSE
## 2 Burns, C. Montgomery TRUE
## 3 Rev. Timothy Lovejoy FALSE
## 4 Ned Flanders FALSE
## 5 Simpson, Homer FALSE
## 6 Dr. Julius Hibbert FALSE
sol1 <- c("124$", "$124", "a2b$", "16$cb")
sol1 <- unlist(str_detect(sol1, "[0-9]+\\$"))
sol1
## [1] TRUE FALSE FALSE TRUE
sol2 <- c("abcdf", "ghij", "KLMN", "6542", "opqr 23")
sol2 <- unlist(str_extract(sol2, "\\b[a-z]{1,4}\\b"))
sol2
## [1] NA "ghij" NA NA "opqr"
sol3 <- c("ahs.rtxt", "123asd.txt", "txt.r")
sol3 <- unlist(str_extract(sol3, ".*?\\.txt$"))
sol3
## [1] NA "123asd.txt" NA
sol4 <- c("09/24/1979", "mm/dd/yyyy", "23/sept/1979")
sol4 <- unlist(str_extract(sol4, "\\d{2}/\\d{2}/\\d{4}"))
sol4
## [1] "09/24/1979" NA NA
message <- unlist(str_extract_all(hMessage, "[[:upper:].]{1,}"))
message
## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "AT" "I" "O" "N" "S"
## [15] "." "Y" "O" "U" "." "A" "R" "E" "." "A" ".S" "U" "P" "E"
## [29] "R" "N" "E" "R" "D"
message <- str_replace_all(paste(message, collapse = ''), "[.]", " ")
message
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"