require(stringr)
## Loading required package: stringr
raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5543642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
title_split <- str_trim(unlist(sapply(name, function(x) str_split(x, "Rev.|Dr."))))
name_without_title <- unlist(sapply(title_split, function(x) if(x != '') x))
fn_reverse_name <- function(x) {
temp <- unlist(str_split(x, ","))
if (length(temp == 2)) {
str_c(str_trim(temp[2]), str_trim(temp[1]), sep = " ")
}
}
name_reversed <- unlist(sapply(name_without_title, fn_reverse_name))
combined_result <- c(name_without_title[!str_detect(name_without_title, ",")], name_reversed[!is.na(name_reversed)])
result <- str_replace(combined_result, pattern = "C. ", replacement = "")
print(result)
## [1] "Moe Szyslak" "Timothy Lovejoy" "Ned Flanders"
## [4] "Julius Hibbert" "Montgomery Burns" "Homer Simpson"
str_detect(name, "^Rev.|^Dr.")
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
Only Burns, C. Montgomery has second name.
name_without_title <- str_replace(name,pattern="Rev.|Dr.", replacement="")
name_count <- str_count(name_without_title,"\\w+")
str_detect(name_count, "3")
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
Matches string with numbers from 0 to 9 atleast once and ends in $.
unlist(str_extract_all(c("1234$", "543"), "[0-9]+\\$"))
## [1] "1234$"
\\b matches the pattern at the beginning or end of each word. So it will match all strings with characters from a to z with min length of 1 and max length of 4.
unlist(str_extract_all(c("asdf", "aa", "a", "abcde"), "\\b[a-z]{1,4}\\b"))
## [1] "asdf" "aa" "a"
.* matches zero or more any characters. ? makes it optional. And the string should end in .txt
unlist(str_extract_all(c("asdf.txt", ".txt", "~.txt", "tt.txts"), ".*?\\.txt$"))
## [1] "asdf.txt" ".txt" "~.txt"
Matches string of date like format.
unlist(str_extract_all(c("11/11/1234", "11/11/123"), "\\d{2}/\\d{2}/\\d{4}"))
## [1] "11/11/1234"
Matches html tags which are wellformed and not empty.
unlist(str_extract_all(c("<head>abc</head>", "<body></body>"), "<(.+?)>.+?</\\1>"))
## [1] "<head>abc</head>"
secret_message <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
paste(unlist(str_extract_all(secret_message,"[[:upper:]]")), sep = "", collapse = "")
## [1] "CONGRATULATIONSYOUAREASUPERNERD"