suppressMessages(library(stringr))
Copy the introductory example. The vector name stores the extracted names. R> name [1] “Moe Szyslak” “Burns, C. Montgomery” “Rev. Timothy Lovejoy” [4] “Ned Flanders” “Simpson, Homer” “Dr. Julius Hibbert”
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
Extracted.names <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
Extracted.names
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
Use the tools of this chapter to rearrange the vector so that all elements conform to the standard first_name last_name. Stringr was used to combine steps into a single line of code.
Std.FullName<-str_trim(sub("([^,]+),([^,]+)","\\2 \\1", sub("[[:alpha:].]{1,3}\\. ","",Extracted.names)))
Std.FullName
## [1] "Moe Szyslak" "Montgomery Burns" "Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Julius Hibbert"
Construct a logical vector indicating whether a character has a title (i.e., Rev. and Dr.)
Title<-str_detect(Extracted.names,"[[:alpha:].]{2,3}\\. ")
cbind(Extracted.names,Title)
## Extracted.names Title
## [1,] "Moe Szyslak" "FALSE"
## [2,] "Burns, C. Montgomery" "FALSE"
## [3,] "Rev. Timothy Lovejoy" "TRUE"
## [4,] "Ned Flanders" "FALSE"
## [5,] "Simpson, Homer" "FALSE"
## [6,] "Dr. Julius Hibbert" "TRUE"
Construct a logical vector indicating whether a character has a second name.
MiddleName<-str_detect(Extracted.names," [[:alpha:].]{1}\\. ")
cbind(Extracted.names,MiddleName)
## Extracted.names MiddleName
## [1,] "Moe Szyslak" "FALSE"
## [2,] "Burns, C. Montgomery" "TRUE"
## [3,] "Rev. Timothy Lovejoy" "FALSE"
## [4,] "Ned Flanders" "FALSE"
## [5,] "Simpson, Homer" "FALSE"
## [6,] "Dr. Julius Hibbert" "FALSE"
Describe the types of strings that conform to the following regular expressions and construct an example that is matched by the regular expression.
str4.1 <- c("$4","$40","4$","40$30","30$","56 $")
str_detect(str4.1,'[0-9]+\\$')
## [1] FALSE FALSE TRUE TRUE TRUE FALSE
\b[a-z]{1,4}\b
str4.2 <- c("four times four sixteen","aim high"," t ",'attt')
str_detect(str4.2,'\\b[a-z]{1,4}\\b')
## [1] TRUE TRUE TRUE TRUE
.*?\.txt$
str4.3 <- c("File Format.txt","String Manipulation using stringR.txt","books.html","filename.php")
str_detect(str4.3,'.*?\\.txt$')
## [1] TRUE TRUE FALSE FALSE
\d{2}/\d{2}/\d{4}
str4.4 <- c("01/22/2019","22/01/2019")
str_detect(str4.4,"\\d{2}/\\d{2}/\\d{4}")
## [1] TRUE TRUE
The following code hides a secret message. Crack it with R and regular expressions. Hint: Some of the characters are more revealing than others! The code snippet is also available in the materials at www.r-datacollection.com.
str9 <-"clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
str_replace_all(str9,"([a-z])|([0-9])|\n","")
## [1] "CONGRATULATIONS.YOU.ARE.A.SUPERNERD!"