library(stringr)
raw.data <-" 555-1239Moe Szyslak( 636) 555-0113Burns, C. Montgomery555 -6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
Question 1
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
split_name <- str_split(name, ",")
split_name
## [[1]]
## [1] "Moe Szyslak"
##
## [[2]]
## [1] "Burns" " C. Montgomery"
##
## [[3]]
## [1] "Rev. Timothy Lovejoy"
##
## [[4]]
## [1] "Ned Flanders"
##
## [[5]]
## [1] "Simpson" " Homer"
##
## [[6]]
## [1] "Dr. Julius Hibbert"
str_
split_name <- data.frame(split_name)
split_name
## X.Moe.Szyslak. c..Burns.....C..Montgomery.. X.Rev..Timothy.Lovejoy.
## 1 Moe Szyslak Burns Rev. Timothy Lovejoy
## 2 Moe Szyslak C. Montgomery Rev. Timothy Lovejoy
## X.Ned.Flanders. c..Simpson.....Homer.. X.Dr..Julius.Hibbert.
## 1 Ned Flanders Simpson Dr. Julius Hibbert
## 2 Ned Flanders Homer Dr. Julius Hibbert
ln <- data.frame(split_name[1,])
fn <- data.frame(split_name[2, ])
split_name <- ifelse(fn == ln, ln , rbind(fn, ln))
split_name
## [[1]]
## [1] Moe Szyslak
## Levels: Moe Szyslak
##
## [[2]]
## [1] C. Montgomery Burns
## Levels: C. Montgomery Burns
##
## [[3]]
## [1] Rev. Timothy Lovejoy
## Levels: Rev. Timothy Lovejoy
##
## [[4]]
## [1] Ned Flanders
## Levels: Ned Flanders
##
## [[5]]
## [1] Homer Simpson
## Levels: Homer Simpson
##
## [[6]]
## [1] Dr. Julius Hibbert
## Levels: Dr. Julius Hibbert
Question 2: Construct a logical vector indicating whether a character has a title (i.e., Rev. and Dr.).
title <- str_detect(name, "[[:alpha:]]{2,}\\.")
title
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
Construct a logical vector indicating whether a character has a second name.
secondname <- str_detect(name, "[A-Z]\\.{1}")
secondname
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
Describe the types of strings that conform to the following regular expressions and construct an example that is matched by the regular expression.
[0-9] +\ $ Matches numbers 0-9 zero or more times with a dollar sign following the number string
example <- "6729$"
regex = "[0-9]+\\$"
str_extract(example, regex)
## [1] "6729$"
\ b[ a-z]{ 1,4}\ b matches character a-z atleast 1 time and at most 4 times with empty string at either edge of the word
example <- "abcd efgh"
regex = "\\b[a-z]{1,4}\\b"
str_extract(example, regex)
## [1] "abcd"
.*?\. txt $ matches a string followed by .txt
example <- "abcd.txt"
regex = ".*?\\.txt$"
str_extract(example, regex)
## [1] "abcd.txt"
\ d{ 2}/\ d{ 2}/\ d{ 4} matches dates with two digit month, two digit day, and four digit year sepreated by
example <- "01/17/19889"
regex = "\\d{2}/\\d{2}/\\d{4}"
str_extract(example, regex)
## [1] "01/17/1988"
<(. +?) >. +? </\ 1 > matches an HTML tag
example = "<Title>Sometext</head><body>Sometext</body>"
regex = "<(.+?)>.+?</\\1>"
str_extract(example, regex)
## [1] "<body>Sometext</body>"