Question 3:

library(stringr)

Copy the introductory example. The vector name stores the extracted names: [1] “Moe Szyslak” “Burns, C. Montgomery” “Rev. Timothy Lovejoy” [4] “Ned Flanders” “Simpson, Homer” “Dr. Julius Hibbert”

raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
names <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
print(names)
## [1] "Moe Szyslak"          "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Julius Hibbert"
  1. Use the tools of this chapter to rearrange the vector so that all elements conform to the standard first_name last_name.
#remove initials 
names2 <- str_replace(names , "C. ", "")
names2 <- str_replace(names2 , "Rev. ", "")
names2 <- str_replace(names2 , "Dr. ", "")


#Use string split to split the names with comma values. The paste function served as my concatenator. 
names2[2] <- str_split(names2[2], pattern = ",")
names2[2] <- paste(names2[2][[1]][2], names2[2][[1]][1], sep=" ")
names2[5] <- str_split(names2[5], pattern = ",")
names2[5] <- paste(names2[5][[1]][2], names2[5][[1]][1], sep=" ")


print(names2)
## [[1]]
## [1] "Moe Szyslak"
## 
## [[2]]
## [1] " Montgomery Burns"
## 
## [[3]]
## [1] "Timothy Lovejoy"
## 
## [[4]]
## [1] "Ned Flanders"
## 
## [[5]]
## [1] " Homer Simpson"
## 
## [[6]]
## [1] "Julius Hibbert"
  1. Construct a logical vector indicating whether a character has a title (i.e., Rev. and Dr.).
dr_title <- str_detect(names, "Dr. ")
rev_title <- str_detect(names, "Rev. ")
title_df <- data.frame(names,dr_title, rev_title )
title_df
##                  names dr_title rev_title
## 1          Moe Szyslak    FALSE     FALSE
## 2 Burns, C. Montgomery    FALSE     FALSE
## 3 Rev. Timothy Lovejoy    FALSE      TRUE
## 4         Ned Flanders    FALSE     FALSE
## 5       Simpson, Homer    FALSE     FALSE
## 6   Dr. Julius Hibbert     TRUE     FALSE
  1. Construct a logical vector indicating whether a character has a second name (Middle Name?).
middle_name <- str_detect(names, "C. ")
midle_name_df <- data.frame(names,middle_name )
midle_name_df 
##                  names middle_name
## 1          Moe Szyslak       FALSE
## 2 Burns, C. Montgomery        TRUE
## 3 Rev. Timothy Lovejoy       FALSE
## 4         Ned Flanders       FALSE
## 5       Simpson, Homer       FALSE
## 6   Dr. Julius Hibbert       FALSE

Question 4:

Describe the types of strings that conform to the following regular expressions and construct an example that is matched by the regular expression.

expression_1 = "[0-9]+\\$"

#expression_1 applies to one or more numbers, followed by the "$" sign.

test_1 <- c("5627$", "65666$", "2$")
str_detect(test_1,expression_1)
## [1] TRUE TRUE TRUE
expression_2 <- "\\b[a-z]{1,4}\\b"
#expression_2 applies to words with one to four letters
test_2<- c("Meaghan", "test", "regx", "questions")
str_detect(test_2, expression_2)
## [1] FALSE  TRUE  TRUE FALSE
expression_3 <- ".*?\\.txt$"
#expression_3 represents a string with a ".txt" at the end. 
test_3 <- c("file.txt","file.pdf","file.exe","homework.txt")

str_detect(test_3, expression_3)
## [1]  TRUE FALSE FALSE  TRUE
expression_4<-"\\d{2}/\\d{2}/\\d{4}"
#expression of numbers in the xx/xx/xxxx, relavent for dates.

test_4 <-c("05/26/1993", "05/10/2015", "12/31/2017")
str_detect(test_4, expression_4)
## [1] TRUE TRUE TRUE
expression_5 <- "<(.+?)>.+?</\\1>"
#this expression is a tag format.The first part is an expression1 inside the "<>" followed by expression2 outside "<>", followed by expression1 inside "</>"
test_5 <- c("<test1>test2</test1>", "<html>blue</html>")
str_detect(test_5, expression_5)
## [1] TRUE TRUE

Question 9

The following code hides a secret message. Crack it with R and regular expressions. Hint: Some of the characters are more revealing than others! The code snippet is also available in the materials at www.r-datacollection.com.

extra_credit <- paste("clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5 fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr")

upper_case <- unlist(str_extract_all(extra_credit, "[[:upper:].]{1,}"))
upper_case
##  [1] "C"  "O"  "N"  "G"  "R"  "A"  "T"  "U"  "L"  "AT" "I"  "O"  "N"  "S" 
## [15] "."  "Y"  "O"  "U"  "."  "A"  "R"  "E"  "."  "A"  ".S" "U"  "P"  "E" 
## [29] "R"  "N"  "E"  "R"  "D"
length(upper_case)
## [1] 33
upper_case <- paste(upper_case, sep="", collapse="")
secret_message <- str_replace_all(upper_case, "[.]", " ")

secret_message
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"