Reg Ex Week #4

#Regex Question #3 - simply copy names function
raw.data <- "555-1239Moe Szyslak (636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Juilius Hibbert"
library(stringr)
name <- unlist (str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name

## [1] "Moe Szyslak "         "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Juilius Hibbert"

str_extract(name,"[[:alpha:]., ]{2,}" )

## [1] "Moe Szyslak "         "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Juilius Hibbert"

phone <- unlist(str_extract_all(raw.data, "\\(?(\\d{3})?\\)?(-| )?\\d{3}(-| )?\\d{4}"))
phone

## [1] "555-1239"       "(636) 555-0113" "555-6542"       "555 8904"      
## [5] "636-555-3226"   "5553642"

data.frame(name = name, phone = phone)

##                   name          phone
## 1         Moe Szyslak        555-1239
## 2 Burns, C. Montgomery (636) 555-0113
## 3 Rev. Timothy Lovejoy       555-6542
## 4         Ned Flanders       555 8904
## 5       Simpson, Homer   636-555-3226
## 6  Dr. Juilius Hibbert        5553642

#Regex Question 3a
str_replace_all(name, "^([^,]*), (.*)", "\\2 \\1")

## [1] "Moe Szyslak "         "C. Montgomery Burns"  "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Homer Simpson"        "Dr. Juilius Hibbert"

nameA<-(str_replace_all(name, "^([^,]*), (.*)", "\\2 \\1"))
str_replace_all(nameA, "\\w+\\.", "")

## [1] "Moe Szyslak "      " Montgomery Burns" " Timothy Lovejoy" 
## [4] "Ned Flanders"      "Homer Simpson"     " Juilius Hibbert"

str_trim(str_replace_all(nameA, "\\w+\\.", ""))

## [1] "Moe Szyslak"      "Montgomery Burns" "Timothy Lovejoy" 
## [4] "Ned Flanders"     "Homer Simpson"    "Juilius Hibbert"

#Regex Question 3b
str_detect(name, "\\w+\\.")

## [1] FALSE  TRUE  TRUE FALSE FALSE  TRUE

#Regex Question 3c
str_detect(nameA, "\\w+\\s\\w+")

## [1] TRUE TRUE TRUE TRUE TRUE TRUE

#Regex Question 7
#This yields default incorrect output
tag <-"<title>+++BREAKING NEWS+++</title>"
firsttag <- unlist(str_extract_all(tag, "<.+>"))
firsttag

## [1] "<title>+++BREAKING NEWS+++</title>"

#Below are two options for extracting "title" from the first tag
firsttag <- unlist(str_extract_all(tag, "<.....>"))
firsttag

## [1] "<title>"

firsttag2 <- unlist(str_extract_all(tag, "<.{1,5}>"))
firsttag2

## [1] "<title>"

#Regex Question 8

#This yields default incorrect output
bitheoremstring <- "(5-3)^2=5^2-2*5*3+3^2"

bitheoremstring

## [1] "(5-3)^2=5^2-2*5*3+3^2"

bitheoremextract <- unlist(str_extract_all(bitheoremstring, "[^0-9=+*()]+"))
bitheoremextract

## [1] "-" "^" "^" "-" "^"

#This yields all of the operators within a string
bitheoremextract <- unlist(str_extract_all(bitheoremstring, "\\D"))
bitheoremextract

##  [1] "(" "-" ")" "^" "=" "^" "-" "*" "*" "+" "^"

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Reg Ex Week #4

Blandon Casenave

February 21, 2016