raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
#page 206 Automated data collection with R
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
firstName = unlist(str_extract_all(name, "[:punct:] [[:alpha:]]{2,}$|[[:alpha:]]{2,} "))
firstName = unlist(str_extract_all(firstName, "[[:alpha:]]{2,}"))
lastName = unlist(str_extract_all(name, "[^[:punct:]] [[:alpha:]]{2,}$|[[:alpha:]]{2,}, "))
lastName = unlist(str_extract_all(lastName, "[[:alpha:]]{2,}"))
title = unlist(str_extract_all(name, "[[:alpha:]]{2,}\\."))
print(title)
## [1] "Rev." "Dr."
Construct a logical vector indicating whether a character has a title
firstLastdf <- data.frame("FistName" = firstName, "LastName" = lastName, "Title" = title)
hasTitledf <- data.frame("Name" = name, "HasTitle" = str_detect(name, title))
hasLastNamedf <- data.frame("Name" = name, "HasLastName" = str_detect(name, lastName))
print(hasTitledf)
## Name HasTitle
## 1 Moe Szyslak FALSE
## 2 Burns, C. Montgomery FALSE
## 3 Rev. Timothy Lovejoy TRUE
## 4 Ned Flanders FALSE
## 5 Simpson, Homer FALSE
## 6 Dr. Julius Hibbert TRUE
print(hasLastNamedf)
## Name HasLastName
## 1 Moe Szyslak TRUE
## 2 Burns, C. Montgomery TRUE
## 3 Rev. Timothy Lovejoy TRUE
## 4 Ned Flanders TRUE
## 5 Simpson, Homer TRUE
## 6 Dr. Julius Hibbert TRUE
Construct a logical vector indicating whether a character has a second name Notes: is an escape character is digits is word edge > is word end < is word beginning ? is preceding item is optional and will be matched at most once * preceding item is matched zero or more times + preceding item is matched one or more times {n} preceding item is matched n times {n,m} preceding item is matched at least n times, but not more than m times
re="[0-9]+\\$"
str_detect("13443$", re)
## [1] TRUE
re="\\b[a-z]{1,4}\\b"
str_detect("$abcd$", re)
## [1] TRUE
re=".*?\\.txt$"
str_detect("$abcd.txt", re)
## [1] TRUE
re="\\d{2}/\\d{2}/\\d{4}"
str_detect("10/12/1996", re)
## [1] TRUE
re="<(.+?)>.+?</\\1>"
str_detect("<div>w3skewls</div>", re)
## [1] TRUE
secret <-
"clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
message <- unlist(str_extract_all(secret, "[[:upper:].]{1,}"))
message <- str_replace_all(paste(message, collapse = ''), "[.]", " "); message
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"