Problem 3

1. make all items first last

library(stringr)
raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5543642Dr. Julius Hibbert"  

name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}")) #load in vector of names
name
## [1] "Moe Szyslak"          "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Julius Hibbert"
library(stringr)
name_it <-function (input){
  if (str_detect(input,',')){ #not all names have a comma, so we need a conditional
    chopped <- unlist(str_split(input,',')) #split string on comma
    pasted <- str_trim(paste(chopped[2], chopped[1])) #reverse the order then paste back together
    return(pasted)
    
  }else{
    return(input)
  }
}
good_names <- as.character(sapply(name,name_it))
good_names
## [1] "Moe Szyslak"          "C. Montgomery Burns"  "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Homer Simpson"        "Dr. Julius Hibbert"

2. Return logical vector indicating title

grepl("[[:alpha:]]{2,}\\.",name)
## [1] FALSE FALSE  TRUE FALSE FALSE  TRUE

3. Does the character have a middle name?

grepl("[[:alpha:]^.]+,.+\\>.+\\>",name)
## [1] FALSE  TRUE FALSE FALSE FALSE FALSE

Problem 4

Describe the regular expression and provide an example 1. Some strange way of expressing dollars (or something to do with jquery or some other web technology)

test <- "I have 773432$ in my pocket."
str_extract(test,"[0-9]+\\$")
## [1] "773432$"

2. A four letter word

test1 <- "A test"
str_extract(test1, "\\b[a-z]{1,4}\\b")
## [1] "test"

3. The name of a text file

test2 <- "mydata.txt"
str_extract(test2, ".*?\\.txt$")
## [1] "mydata.txt"

4. A date represented with slashes

test3 <- "today is 12/12/2012"
str_extract(test3,"\\d{2}/\\d{2}/\\d{4}")
## [1] "12/12/2012"

5. An html element

test4 <- "<h1>this is my header</h1>"
str_extract(test4, "<(.+?)>.+?</\\1>")
## [1] "<h1>this is my header</h1>"

Problem 9

long_string <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!"
message <- unlist(str_extract_all(long_string, "[[:Upper:][:punct:]]+"))#find all upper case letters and punctionation
paste(unlist(strsplit (paste(message, sep = '', collapse = ''),"\\.")),collapse = ' ')
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD!"
#Paste it together then split it on "."