HW 3 DATA 607

library(stringr)

raw.data <-" 555-1239Moe Szyslak( 636) 555-0113Burns, C. Montgomery555 -6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"

Question 1

name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name

## [1] "Moe Szyslak"          "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Julius Hibbert"

split_name <- str_split(name, ",")
split_name

## [[1]]
## [1] "Moe Szyslak"
## 
## [[2]]
## [1] "Burns"          " C. Montgomery"
## 
## [[3]]
## [1] "Rev. Timothy Lovejoy"
## 
## [[4]]
## [1] "Ned Flanders"
## 
## [[5]]
## [1] "Simpson" " Homer" 
## 
## [[6]]
## [1] "Dr. Julius Hibbert"

str_

split_name <- data.frame(split_name)
split_name

##   X.Moe.Szyslak. c..Burns.....C..Montgomery.. X.Rev..Timothy.Lovejoy.
## 1    Moe Szyslak                        Burns    Rev. Timothy Lovejoy
## 2    Moe Szyslak                C. Montgomery    Rev. Timothy Lovejoy
##   X.Ned.Flanders. c..Simpson.....Homer.. X.Dr..Julius.Hibbert.
## 1    Ned Flanders                Simpson    Dr. Julius Hibbert
## 2    Ned Flanders                  Homer    Dr. Julius Hibbert

ln <- data.frame(split_name[1,])
fn <- data.frame(split_name[2, ])
split_name <- ifelse(fn == ln, ln , rbind(fn, ln))
split_name

## [[1]]
## [1] Moe Szyslak
## Levels: Moe Szyslak
## 
## [[2]]
## [1]  C. Montgomery Burns         
## Levels:  C. Montgomery Burns
## 
## [[3]]
## [1] Rev. Timothy Lovejoy
## Levels: Rev. Timothy Lovejoy
## 
## [[4]]
## [1] Ned Flanders
## Levels: Ned Flanders
## 
## [[5]]
## [1]  Homer  Simpson
## Levels:  Homer Simpson
## 
## [[6]]
## [1] Dr. Julius Hibbert
## Levels: Dr. Julius Hibbert

Question 2: Construct a logical vector indicating whether a character has a title (i.e., Rev. and Dr.).

title <- str_detect(name, "[[:alpha:]]{2,}\\.")
title

## [1] FALSE FALSE  TRUE FALSE FALSE  TRUE

Construct a logical vector indicating whether a character has a second name.

secondname <- str_detect(name, "[A-Z]\\.{1}")
secondname

## [1] FALSE  TRUE FALSE FALSE FALSE FALSE

Describe the types of strings that conform to the following regular expressions and construct an example that is matched by the regular expression.

[0-9] +\ $ Matches numbers 0-9 zero or more times with a dollar sign following the number string

example <- "6729$"  
regex = "[0-9]+\\$"
str_extract(example, regex)

## [1] "6729$"

\ b[ a-z]{ 1,4}\ b matches character a-z atleast 1 time and at most 4 times with empty string at either edge of the word

 example <- "abcd efgh"  
regex = "\\b[a-z]{1,4}\\b"
str_extract(example, regex)

## [1] "abcd"

.*?\. txt $ matches a string followed by .txt

example <- "abcd.txt"  
regex = ".*?\\.txt$"
str_extract(example, regex)

## [1] "abcd.txt"

\ d{ 2}/\ d{ 2}/\ d{ 4} matches dates with two digit month, two digit day, and four digit year sepreated by

example <- "01/17/19889"  
regex = "\\d{2}/\\d{2}/\\d{4}"
str_extract(example, regex)

## [1] "01/17/1988"

<(. +?) >. +? </\ 1 > matches an HTML tag

example = "<Title>Sometext</head><body>Sometext</body>"
regex = "<(.+?)>.+?</\\1>" 
str_extract(example, regex)

## [1] "<body>Sometext</body>"

HW 3 DATA 607

Christina Kasman

9/15/2017