3.Copy the introductory example. The vector name stores the extracted names.
library(stringr)
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
FL_names <- str_trim(sub("\\s\\w\\.|\\S\\w+\\.\\s","", name))
FL_names
## [1] "Moe Szyslak" "Burns, Montgomery" "Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Julius Hibbert"
FL_names1<-sub("(\\w+),\\s+(\\w+)","\\2 \\1", FL_names)
FL_names1
## [1] "Moe Szyslak" "Montgomery Burns" "Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Julius Hibbert"
titled_name<-str_detect(name, "Rev.|Dr.")
titled_name
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
cbind(name, titled_name)
## name titled_name
## [1,] "Moe Szyslak" "FALSE"
## [2,] "Burns, C. Montgomery" "FALSE"
## [3,] "Rev. Timothy Lovejoy" "TRUE"
## [4,] "Ned Flanders" "FALSE"
## [5,] "Simpson, Homer" "FALSE"
## [6,] "Dr. Julius Hibbert" "TRUE"
3.Construct a logical vector indicating whether a character has a second name.
second_name <- str_detect(name, "[A-Z]\\.{1}")
second_name
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
cbind(name, second_name)
## name second_name
## [1,] "Moe Szyslak" "FALSE"
## [2,] "Burns, C. Montgomery" "TRUE"
## [3,] "Rev. Timothy Lovejoy" "FALSE"
## [4,] "Ned Flanders" "FALSE"
## [5,] "Simpson, Homer" "FALSE"
## [6,] "Dr. Julius Hibbert" "FALSE"
test<-c("12308129038asdas09$", "hello#", "233124$1", "1$")
number_vec<-unlist(str_extract_all(test, "[0-9]+\\$"))
number_vec
## [1] "09$" "233124$" "1$"
Matches string patterns that have 0-9 digits one or more times followed by the $ sign.
test1<-"badb, abcedf, eheheh, badasdasdasd, abcd"
number_vec1<-unlist(str_extract_all(test1, "\\b[a-z]{1,4}\\b"))
number_vec1
## [1] "badb" "abcd"
Matches words that have one and four lowercase characters only.
c).*?\.txt$
test2<-c("homework.gif", "homework.jpg", "homework2.txt")
number_vec2<-unlist(str_extract_all(test2, ".*?\\.txt$"))
number_vec2
## [1] "homework2.txt"
Matches strings that end with .txt d) \d{2}/\d{2}/\d{4}
test3<-c("07/17/1990", "ab/12/a123", "0909111")
number_vec3<-unlist(str_extract_all(test3, "\\d{2}/\\d{2}/\\d{4}"))
number_vec3
## [1] "07/17/1990"
Matches string with a format of XX/XX/XXXX, X can only be digits. e) <(.+?)>.+?</\1>
test4<-c("<a>HEllo", "<a> Hello </a>", "<b>My name is</d>", "<html>Hi there</html>")
number_vec4<-unlist(str_extract_all(test4, "<(.+?)>.+?</\\1>"))
number_vec4
## [1] "<a> Hello </a>" "<html>Hi there</html>"
Matches the start tag, then characters in between and then end tag. The start tag and end tag must be the same letter.
clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5 fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr
code<-"clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5 fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
crack_code<-unlist(str_extract_all(code, "[[A-Z][:punct:]]"))
crack_code
## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" "." "Y"
## [18] "O" "U" "." "A" "R" "E" "." "A" "." "S" "U" "P" "E" "R" "N" "E" "R"
## [35] "D" "!"