#1. Use the tools of this chapter to rearrange the vector so that all elements conform to the standard first_name last_name.
library(stringr)
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
raw.data
## [1] "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
flname <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
flname
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
for(i in 1:length(flname)) {
if(str_detect(flname[i],",")==TRUE) {
buf<-unlist(str_split(flname[i],","))
flname[i]<-paste(buf[2], buf[1], sep=" ")
}
}
data.frame(flname)
## flname
## 1 Moe Szyslak
## 2 C. Montgomery Burns
## 3 Rev. Timothy Lovejoy
## 4 Ned Flanders
## 5 Homer Simpson
## 6 Dr. Julius Hibbert
# remove initials
flnamewt <- sub(" [A-z]{1}\\. "," ",flname)
data.frame(flnamewt)
## flnamewt
## 1 Moe Szyslak
## 2 Montgomery Burns
## 3 Rev. Timothy Lovejoy
## 4 Ned Flanders
## 5 Homer Simpson
## 6 Dr. Julius Hibbert
#I was not 100% if the requirement was to remove the title as well so created 2 vectors one with title and one without
#Names with Title
data.frame(flnamewt)
## flnamewt
## 1 Moe Szyslak
## 2 Montgomery Burns
## 3 Rev. Timothy Lovejoy
## 4 Ned Flanders
## 5 Homer Simpson
## 6 Dr. Julius Hibbert
#Remove Titles
flname <- sub("[A-z]{2,3}\\. ","",flname)
#Names without Title
data.frame(flname)
## flname
## 1 Moe Szyslak
## 2 C. Montgomery Burns
## 3 Timothy Lovejoy
## 4 Ned Flanders
## 5 Homer Simpson
## 6 Julius Hibbert
#2. Construct a logical vector indicating whether a character has a title (i.e., Rev. and Dr.)
title_v <- str_detect(flnamewt,"[A-z]{2,3}\\. ")
dft <- data.frame(flnamewt,title_v)
dft
## flnamewt title_v
## 1 Moe Szyslak FALSE
## 2 Montgomery Burns FALSE
## 3 Rev. Timothy Lovejoy TRUE
## 4 Ned Flanders FALSE
## 5 Homer Simpson FALSE
## 6 Dr. Julius Hibbert TRUE
#3. Construct a logical vector indicating whether a character has a second name.
secnm_v <- str_detect(flname," [A-z]{1}\\. ")
dfs <- data.frame(flname,secnm_v)
dfs
## flname secnm_v
## 1 Moe Szyslak FALSE
## 2 C. Montgomery Burns TRUE
## 3 Timothy Lovejoy FALSE
## 4 Ned Flanders FALSE
## 5 Homer Simpson FALSE
## 6 Julius Hibbert FALSE
#4. Describe the types of strings that conform to the following regular expressions and construct an example that is matched by the regular expression.
#4.1 [0-9]+\\$ #This expression is true for string of one or more digits followed by dollar sign.
pattern="[0-9]+\\$"
sample=c("123$23","abc123$","XYZ$")
str_detect(sample,pattern)
## [1] TRUE TRUE FALSE
#4.2 \\b[a-z]{1,4}\\b #This expression is true for string of one to four lower case letters.
pattern="\\b[a-z]{1,4}\\b"
sample=c("ABC2","ja24","cuny")
str_detect(sample,pattern)
## [1] FALSE FALSE TRUE
#4.3 .*?\\.txt$ #This expression is true for a string ending in “.txt”.
pattern=".*?\\.txt$"
sample=c(".txt","Test.dat","123.xml","a$b#1.txt")
str_detect(sample,pattern)
## [1] TRUE FALSE FALSE TRUE
#4.4 \\d{2}/\\d{2}/\\d{4} #This expression is true for a string that contains 2 digits with a slash then 2 digits and a slash and followed by 4 digits like a date format
pattern="\\d{2}/\\d{2}/\\d{4}"
sample=c("02/16/2018","02/16/20")
str_detect(sample,pattern)
## [1] TRUE FALSE
#4.5 <(.+?)>.+?</\\1> #This expression is true for a string that has an opening and closing brackets at the beginning, take any string in the middle, then opening and closing brackets with a forward slash like an xml tag
pattern="<(.+?)>.+?</\\1>"
sample=c("<tag>Text</tag>","<tag>123<tag>")
str_detect(sample,pattern)
## [1] TRUE FALSE
#5. The following code hides a secret message. Crack it with R and regular expressions. Hint: Some of the characters are more revealing than others! The code snippet is also available in the materials at www.r-datacollection.com.
code <-
"clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
codex <- unlist(str_extract_all(code, "[[:upper:].]"))
codex
## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" "." "Y"
## [18] "O" "U" "." "A" "R" "E" "." "A" "." "S" "U" "P" "E" "R" "N" "E" "R"
## [35] "D"
concat <- str_replace_all(paste(codex, collapse = ''), "[.]", " ")
concat
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"