library(tidyverse)
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"#use regular expression to extract all the characters from the raw data
chara <- unlist(str_extract_all(raw.data,"[A-Za-z,.\\s]+"))
#remove the empty elements
name <- chara[chara!=" "]
name## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
title_pat <- "Rev. |Dr. "
title_logi <- str_detect(name,title_pat)
title_logi## [1] FALSE FALSE TRUE FALSE FALSE TRUE
#remove the title from name
name_notitle <- str_replace_all(name, title_pat, "")
#count the numbers of space to determine whether a character has a second name
sec_name_logi <- str_count(name_notitle, ' ') >=2
sec_name_logi## [1] FALSE TRUE FALSE FALSE FALSE FALSE
The part of the string begin with at least one integer and end with a "$".
Example: "12345$"
example1 <- c("1$","12345$","12345abc$","abc12345$","ab$")
str_view(example1,"[0-9]+\\$")The part of the string can only contain 1 to 4 lower case letters, no digits or any other types of data.
Example: "how"
example2 <- c("how","ARE","you?","?fine?"," datascience")
str_view(example2,"\\b[a-z]{1,4}\\b")The part of the string can begin with anything but must end with exactly ".txt".
Example: "text.txt"
example3 <- c("text.txt","text.txtt",".txt","?text.txt?","\n.txt")
str_view(example3,".*?\\.txt$")The part of the string begin with 2 digits follow by a "/" then follow by other pair of 2 digits and "/", and finally follow by other 4 digits.
Example: "02/17/2019"
example4 <- c("02/17/2019","ab/cd/2019","023/17/20199","2/17/19","AB02/17/2019CD")
str_view(example4,"\\d{2}/\\d{2}/\\d{4}")The first part of the string begin with "<", and follow by at least a character, then ">".
The middle part is also at least one character.
The last part begin with "<", follow by "/", and end with ">".
Example: "<img>image</img>"
example5 <- c("<img>image</img>","<img>image</>","<img></img>","<>image</img>","<i>image</img>")
str_view(example5,"<(.+?)>.+?</\\1>")clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5 fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr
txt <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5 fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
upper_txt <- unlist(str_extract_all(txt,"[A-Z.]+"))
upper_txt_ws <- paste0(upper_txt,collapse = "")
upper_txt_ws## [1] "CONGRATULATIONS.YOU.ARE.A.SUPERNERD"