raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555
-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson,
Homer5553642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson," "Homer"
## [7] "Dr. Julius Hibbert"
names_new <- str_replace(name, pattern = "Rev. |Dr. |Mr. |Mrs. |Ms ", replacement = ""); #replacing the titles
firstname = sapply(strsplit(names_new, ' '), function(x) x[1])
lastname = sapply(strsplit(names_new, ' '), function(x) x[length(x)])
full_names <- cbind(firstname,lastname)
full_names## firstname lastname
## [1,] "Moe" "Szyslak"
## [2,] "Burns," "Montgomery"
## [3,] "Timothy" "Lovejoy"
## [4,] "Ned" "Flanders"
## [5,] "Simpson," "Simpson,"
## [6,] "Homer" "Homer"
## [7,] "Julius" "Hibbert"
title <- str_detect(name, "^.{3}\\.|^.{2}\\.")
check_title <- cbind(name, title = title)
check_title## name title
## [1,] "Moe Szyslak" "FALSE"
## [2,] "Burns, C. Montgomery" "FALSE"
## [3,] "Rev. Timothy Lovejoy" "TRUE"
## [4,] "Ned Flanders" "FALSE"
## [5,] "Simpson," "FALSE"
## [6,] "Homer" "FALSE"
## [7,] "Dr. Julius Hibbert" "TRUE"
second_name <- ifelse(str_detect(name,"[A-Z]\\.") == TRUE, "YES", "NO")
mid_name <- cbind(name,second_name)
mid_name## name second_name
## [1,] "Moe Szyslak" "NO"
## [2,] "Burns, C. Montgomery" "YES"
## [3,] "Rev. Timothy Lovejoy" "NO"
## [4,] "Ned Flanders" "NO"
## [5,] "Simpson," "NO"
## [6,] "Homer" "NO"
## [7,] "Dr. Julius Hibbert" "NO"
This expression matches any digit or digits between 0 to 9 ending with $ sign here is the example
data1 <- c("Example for the above is expression is 0$","46758$","65472999")
unlist(str_extract_all(data1, "[0-9]+\\$"))## [1] "0$" "46758$"
This expression matches 1 to 4 character lower case words in input string
data2 <- c("Example for the above is expression is one four","1","Two","one")
unlist(str_extract_all(data2, "\\b[a-z]{1,4}\\b"))## [1] "for" "the" "is" "is" "one" "four" "one"
This expression matches any string ending with .txt including the string that equals .txt. Can be used to find text files in a file listing.
data2 <- c("Example for the above is expression is one four.txt","1","Two","one.txt")
unlist(str_extract_all(data2, ".*?\\.txt$"))## [1] "Example for the above is expression is one four.txt"
## [2] "one.txt"
This expression matches 2 digits followed by a / followed by another 2 digits followed by a / and finally followed by four digits We can say date format
data2 <- c("18/07/1984","1","Two","one.txt")
unlist(str_extract_all(data2, "\\d{2}/\\d{2}/\\d{4}"))## [1] "18/07/1984"
This expression matches a pair of openning and closing tags - one or more characters enclosed with < > brackets /tags .
data2 <- c("18/07/1984","1","Two","one.txt","<a>Harpreet<a>test</a></a>")
unlist(str_extract_all(data2, "<(.+?)>.+?</\\1>"))## [1] "<a>Harpreet<a>test</a>"
secret_message <- c("clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr")
paste(unlist(str_extract_all(secret_message, "[:upper:]|[:punct:]")))## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" "." "Y"
## [18] "O" "U" "." "A" "R" "E" "." "A" "." "S" "U" "P" "E" "R" "N" "E" "R"
## [35] "D" "!"