#Regex Question #3 - simply copy names function
raw.data <- "555-1239Moe Szyslak (636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Juilius Hibbert"
library(stringr)
name <- unlist (str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name
## [1] "Moe Szyslak " "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Juilius Hibbert"
str_extract(name,"[[:alpha:]., ]{2,}" )
## [1] "Moe Szyslak " "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Juilius Hibbert"
phone <- unlist(str_extract_all(raw.data, "\\(?(\\d{3})?\\)?(-| )?\\d{3}(-| )?\\d{4}"))
phone
## [1] "555-1239" "(636) 555-0113" "555-6542" "555 8904"
## [5] "636-555-3226" "5553642"
data.frame(name = name, phone = phone)
## name phone
## 1 Moe Szyslak 555-1239
## 2 Burns, C. Montgomery (636) 555-0113
## 3 Rev. Timothy Lovejoy 555-6542
## 4 Ned Flanders 555 8904
## 5 Simpson, Homer 636-555-3226
## 6 Dr. Juilius Hibbert 5553642
#Regex Question 3a
str_replace_all(name, "^([^,]*), (.*)", "\\2 \\1")
## [1] "Moe Szyslak " "C. Montgomery Burns" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Dr. Juilius Hibbert"
nameA<-(str_replace_all(name, "^([^,]*), (.*)", "\\2 \\1"))
str_replace_all(nameA, "\\w+\\.", "")
## [1] "Moe Szyslak " " Montgomery Burns" " Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" " Juilius Hibbert"
str_trim(str_replace_all(nameA, "\\w+\\.", ""))
## [1] "Moe Szyslak" "Montgomery Burns" "Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Juilius Hibbert"
#Regex Question 3b
str_detect(name, "\\w+\\.")
## [1] FALSE TRUE TRUE FALSE FALSE TRUE
#Regex Question 3c
str_detect(nameA, "\\w+\\s\\w+")
## [1] TRUE TRUE TRUE TRUE TRUE TRUE
#Regex Question 7
#This yields default incorrect output
tag <-"<title>+++BREAKING NEWS+++</title>"
firsttag <- unlist(str_extract_all(tag, "<.+>"))
firsttag
## [1] "<title>+++BREAKING NEWS+++</title>"
#Below are two options for extracting "title" from the first tag
firsttag <- unlist(str_extract_all(tag, "<.....>"))
firsttag
## [1] "<title>"
firsttag2 <- unlist(str_extract_all(tag, "<.{1,5}>"))
firsttag2
## [1] "<title>"
#Regex Question 8
#This yields default incorrect output
bitheoremstring <- "(5-3)^2=5^2-2*5*3+3^2"
bitheoremstring
## [1] "(5-3)^2=5^2-2*5*3+3^2"
bitheoremextract <- unlist(str_extract_all(bitheoremstring, "[^0-9=+*()]+"))
bitheoremextract
## [1] "-" "^" "^" "-" "^"
#This yields all of the operators within a string
bitheoremextract <- unlist(str_extract_all(bitheoremstring, "\\D"))
bitheoremextract
## [1] "(" "-" ")" "^" "=" "^" "-" "*" "*" "+" "^"
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.