library("stringr")
name <- c("Moe Szyslak", "Burns, C. Montogomery", "Rev. Timothy Lovejoy", "Ned Flanders", "Simpson, Homer", "Dr. Julius Hibbert")
last_name <-unlist(str_extract_all(name, "[[:alpha:]]{2,},|[^[\\.]][[:blank:]][[:alpha:]]{3,}"))
last_name <-unlist(str_extract_all(last_name, "[[:alpha:]]{2,}"))
last_name
## [1] "Szyslak" "Burns" "Lovejoy" "Flanders" "Simpson" "Hibbert"
first_name <-unlist(str_extract_all(name, ", [[:alpha:]]{2,}|[[:upper:]]{1}. [[:alpha:]]{2,}|[[:alpha:]]{3,}[[:blank:]]"))
first_name <-unlist(str_extract_all(first_name, "[[:alpha:]]{2,}|[[:upper:]]{1}. [[:alpha:]]{2,}"))
first_name
## [1] "Moe" "C. Montogomery" "Timothy" "Ned"
## [5] "Homer" "Julius"
name_table <- data.frame (first_name, last_name)
name_table
## first_name last_name
## 1 Moe Szyslak
## 2 C. Montogomery Burns
## 3 Timothy Lovejoy
## 4 Ned Flanders
## 5 Homer Simpson
## 6 Julius Hibbert
title <- str_detect(name, "[[:upper:]]{1}[[:alpha:]]{1,}\\.")
name_table <- cbind(name_table, title)
name_table
## first_name last_name title
## 1 Moe Szyslak FALSE
## 2 C. Montogomery Burns FALSE
## 3 Timothy Lovejoy TRUE
## 4 Ned Flanders FALSE
## 5 Homer Simpson FALSE
## 6 Julius Hibbert TRUE
two_names <- str_detect(first_name, "[[:upper:]]{1}\\. [[:alpha:]]{1,}")
name_table <- cbind(name_table, two_names)
name_table
## first_name last_name title two_names
## 1 Moe Szyslak FALSE FALSE
## 2 C. Montogomery Burns FALSE TRUE
## 3 Timothy Lovejoy TRUE FALSE
## 4 Ned Flanders FALSE FALSE
## 5 Homer Simpson FALSE FALSE
## 6 Julius Hibbert TRUE FALSE
Describe the types of strings that confom to the following regular expressons and construct an example that is matched by the regular expression.
[0-9]+\$
a digit between 0 and 9 that is repeated one or more times followed by $
example: 45$
a <- "45$"
str_extract_all(a, "[0-9]+\\$")
## [[1]]
## [1] "45$"
\b[a-z]{1,4}\b
a string that begins with a lower case letter that appears at least once but no more than 4 times followed by where the number is the end of the string
example: yes
b <- "yes"
str_extract_all(b, "\\b[a-z]{1,4}\\b")
## [[1]]
## [1] "yes"
.*?\.txt$
a single character matched zero or more times (the preceeding item is optional and will be matched at most once) followed by a period and followed by txt
example: file.txt
c <- "file.txt"
str_extract_all(c, ".*?\\.txt$")
## [[1]]
## [1] "file.txt"
\d{2}/\d{2}/\d{4}
a two digit number followed by / followed by a 2 digit number followed by / followed by a 4 digit number … a date
example: 11/12/1985
d <- "11/12/1985"
str_extract_all(d, "\\d{2}/\\d{2}/\\d{4}")
## [[1]]
## [1] "11/12/1985"
<(.+?)>.+?</\1>
a string that starts with < which is followed by one or more characters and then followed by > which is followed by at least 1 character which is followed by the repeat of the first set of characters in sideways brackets preceeded by /
example:e <- "<h1>heading</h1>"
str_extract_all(e, "<(.+?)>.+?</\\1>")
## [[1]]
## [1] "<h1>heading</h1>"