Data 607 HW3

First Name Last Name

library(stringr)

## Warning: package 'stringr' was built under R version 3.3.3

raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"

#Take out everything that isn't a letter
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
#name

#Flip everything before a comma to be after 
flip <- str_replace_all(name, "(.+)(,.+)", "\\2 \\1")
#flip

#Get rid of any commas and display the first and last names
first_last <- str_replace_all(flip, ", ", "")
first_last

## [1] "Moe Szyslak"          "C. Montgomery Burns"  "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Homer Simpson"        "Dr. Julius Hibbert"

Has a Title?

#Look for names with Rev or Dr
title <- str_detect(first_last, "Rev.|Dr.")
title

## [1] FALSE FALSE  TRUE FALSE FALSE  TRUE

#Display names and if they have a title
has_title <- data.frame (first_last, title)
has_title

##             first_last title
## 1          Moe Szyslak FALSE
## 2  C. Montgomery Burns FALSE
## 3 Rev. Timothy Lovejoy  TRUE
## 4         Ned Flanders FALSE
## 5        Homer Simpson FALSE
## 6   Dr. Julius Hibbert  TRUE

Has Second Name?

#Look for names with a letter before a dot (ex: C. )
second_name <- str_detect(first_last,"[A-Z]\\.")
second_name

## [1] FALSE  TRUE FALSE FALSE FALSE FALSE

#Display names and if they have a second name
has_second_name <-  data.frame (first_last, second_name)
has_second_name

##             first_last second_name
## 1          Moe Szyslak       FALSE
## 2  C. Montgomery Burns        TRUE
## 3 Rev. Timothy Lovejoy       FALSE
## 4         Ned Flanders       FALSE
## 5        Homer Simpson       FALSE
## 6   Dr. Julius Hibbert       FALSE

**Regex Examples*

# "[0-9]+\\$" will show numbers with a $ after it
str_extract_all("This will show 500$ but not $500", "[0-9]+\\$")

## [[1]]
## [1] "500$"

# "\\b[a-z]{1,4}\\b" Will show any words with 1 to 4 lower case letters in a row
str_extract_all("this will show THIS WILL NOT SHOW", "\\b[a-z]{1,4}\\b")

## [[1]]
## [1] "this" "will" "show"

# ".*?\\.txt$" Will show all text before .txt
str_extract_all("Show all the text before .txt", ".*?\\.txt$")

## [[1]]
## [1] "Show all the text before .txt"

#"\\d{2}/\\d{2}/\\d{4}" will show 2 numbers, a /, followed by 2 more numbers, a /, and then 4 more numbers
str_extract_all("2017/09/01 09/01/2017 09-01-2017","\\d{2}/\\d{2}/\\d{4}")

## [[1]]
## [1] "09/01/2017"

# "<(.+?)>.+?</\\1>" will show corrected tagged HTML where both tags match
str_extract_all("<tag>This will show corrected tagged HTML text</tag>  <tag>Will not show <tag>","<(.+?)>.+?</\\1>")

## [[1]]
## [1] "<tag>This will show corrected tagged HTML text</tag>"

Hidden Message

hidden <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
#hidden

# Extract all uppercase letters and the ending "!"
reveal<- unlist(str_extract_all(hidden, "[[:upper:].!]"))
reveal

##  [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" "." "Y"
## [18] "O" "U" "." "A" "R" "E" "." "A" "." "S" "U" "P" "E" "R" "N" "E" "R"
## [35] "D" "!"

# Replace all the "," with a space
reveal2 <- str_replace_all(reveal,"\\."," ")
reveal2

##  [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" " " "Y"
## [18] "O" "U" " " "A" "R" "E" " " "A" " " "S" "U" "P" "E" "R" "N" "E" "R"
## [35] "D" "!"

#Make the words a complete sentence
paste(reveal2, collapse = '')

## [1] "CONGRATULATIONS YOU ARE A SUPERNERD!"

Data 607 HW3

David Quarshie

September 15, 2017