name <- c("Moe Szyslak","Burns, C. Montgomery","Rev. Timothy Lovejoy","Ned Flanders","Simpson, Homer","Dr. Julius Hibbert")
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
library(stringr)
firstname <- unlist(str_replace_all(name, pattern = "[[:alpha:]]+\\.",replacement = ""))
first_name1 <- unlist(str_extract(firstname,"[[:alpha:]]{2,}\\w+"))
first_name1
## [1] "Moe" "Burns" "Timothy" "Ned" "Simpson" "Julius"
last_name <- unlist(str_extract(firstname,"[[:alpha:]]{3,}$"))
last_name
## [1] "Szyslak" "Montgomery" "Lovejoy" "Flanders" "Homer"
## [6] "Hibbert"
dataFrame <- data.frame(first_name = first_name1,last_name = last_name)
dataFrame
## first_name last_name
## 1 Moe Szyslak
## 2 Burns Montgomery
## 3 Timothy Lovejoy
## 4 Ned Flanders
## 5 Simpson Homer
## 6 Julius Hibbert
title <- unlist(str_extract(name,"^[[:alpha:]]+\\."))
title
## [1] NA NA "Rev." NA NA "Dr."
title_vector <- unlist(str_detect(name,"^[[:alpha:]]+\\."))
title_vector
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
second_name <- unlist(str_extract(name,"\\ [[:alpha:]]+\\."))
second_name
## [1] NA " C." NA NA NA NA
second_name_vector <- unlist(str_detect(name,"\\ [[:alpha:]]+\\."))
second_name_vector
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
The string of numbers starting from zero with ‘$’ will get returned
t <- "ab2654$895$zyw$123$"
unlist(str_extract_all(t,"[0-9]+\\$"))
## [1] "2654$" "895$" "123$"
The string of one to four characters with alphabets from “a” to “z”(lowercase) starting and ending with word edges.
q <- "A spacious work place for intelligent brain"
unlist(str_extract_all(q,"\\b[a-z]{1,4}\\b"))
## [1] "work" "for"
Vector returns the string with “.txt” extention
z <- "A_spacious_work_place_for_intelligent_brain.txt"
unlist(str_extract_all(z,".*?\\.txt$"))
## [1] "A_spacious_work_place_for_intelligent_brain.txt"
The vector with pattern suitable to extract DATE or TIME format
r <- "12/23/2017"
unlist(str_extract_all(r,"\\d{2}/\\d{2}/\\d{4}"))
## [1] "12/23/2017"
This query extracts the characters between two brakets and there will be subsequent occurance of these characters using “\1” regardless of what comes in between (.+?)(backreferencing).
d <- c("<!hello><html>world</html>")
unlist(str_extract_all(d,"<(.+?)>.+?</\\1>"))
## [1] "<html>world</html>"
code <-"clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
decode <- unlist(str_extract_all(code,"[[:upper:]]+"))
decode
## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "AT" "I" "O" "N" "S"
## [15] "Y" "O" "U" "A" "R" "E" "A" "S" "U" "P" "E" "R" "N" "E"
## [29] "R" "D"
Using the hint mentioned,we checked with the upper case letters ,to find the sensible message out of the code.Analyzing every character ,The uppercase letters do make sense.But we need to get them combined.
decode <- unlist(str_extract_all(code,"[[:upper:].]{1,}" ))
decode <- str_replace_all(paste(decode,collapse = ""),"[.]"," ")
decode
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"