raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
raw.data
## [1] "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
# Re arrange the vector using tools in Chapter 11 so that all the elements confirm to the standard first_name last_name
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
first_last <- unlist(sub("([[:alnum:]_]{1,}), *([[:alnum:]_]{1,})", "\\2 \\1", sub("([[:alnum:]_]{1,})\\. "," ",name)))
first_last
## [1] "Moe Szyslak" "Montgomery Burns" " Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" " Julius Hibbert"
# Logical vector to indicate whether the character has a title
title_vector <- c(str_detect(name, "(^[[:alnum:]_]{1,}\\.).*"))
name_title <- data.frame(name, title_vector)
name_title
## name title_vector
## 1 Moe Szyslak FALSE
## 2 Burns, C. Montgomery FALSE
## 3 Rev. Timothy Lovejoy TRUE
## 4 Ned Flanders FALSE
## 5 Simpson, Homer FALSE
## 6 Dr. Julius Hibbert TRUE
# Logical vector to indicate whether the character has a second name
second_name <- c(str_detect(name, " [[:alnum:]_]{1,}\\. "))
is_second <- data.frame(name, second_name)
is_second
## name second_name
## 1 Moe Szyslak FALSE
## 2 Burns, C. Montgomery TRUE
## 3 Rev. Timothy Lovejoy FALSE
## 4 Ned Flanders FALSE
## 5 Simpson, Homer FALSE
## 6 Dr. Julius Hibbert FALSE
Discribe the type of strings and construct an example
(1) [0-9]+$
Any digit between 0 to 9 followed by $
str_extract("ewqee3121$", "[0-9]+\\$")
## [1] "3121$"
set of strings with 1-4 lower case letters
strings <- c("abcde", "abcd", "ab", "1234", "abc", "ABcd","12/03/2010", "10/27/1982", "text.txt", "data.csv", "essay.docx", "<header>My Web Site</header>")
unlist(str_extract_all(strings, "\\b[a-z]{1,4}\\b"))
## [1] "abcd" "ab" "abc" "text" "txt" "data" "csv" "docx"
return strings ending with .txt
unlist(str_extract_all(strings, ".*?\\.txt$" ))
## [1] "text.txt"
string that contains digits in the format: xx/xx/xxxx
unlist(str_extract_all(strings, "\\d{2}/\\d{2}/\\d{4}"))
## [1] "12/03/2010" "10/27/1982"
strings that contains opening and clossing tags as in html coding. Ex:
unlist(str_extract_all(strings, "<(.+?)>.+?</\\1>"))
## [1] "<header>My Web Site</header>"
secret_message <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
uppercase <- paste(unlist(str_extract_all(secret_message, "[[A-Z].?]")), collapse = "")
lowercase <- paste(unlist(str_extract_all(secret_message, "[[a-z].?]")), collapse = "")
numbers <- paste(unlist(str_extract_all(secret_message, "[[0-9].?]")), collapse = "")
uppercase
## [1] "CONGRATULATIONS.YOU.ARE.A.SUPERNERD"
Uppercase letters together makes a meaningful statement.
lowercase
## [1] "clcopowzmstcdwnkigvdicpuggvhrynjuwczihqrfpxsjdwpnanwowisdijjkpfdrcocbtyczjataootjtjnecfek.rwwwojigdvrfrbz.bknbhzgvizcrop.wgnb.qofaotfbwmktszqefyndtkcfgmcgxonhkgr"
numbers
## [1] "1087792855078035307553364.1162.24905..651724639589659490545"
Decoded secret message
decoded_message <- str_replace_all(uppercase, "[.]"," ")
decoded_message
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"