Question 3

#needed library
library(stringr)

#raw data from ch 8
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"

#get only names from raw data
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))

3.1

Rearrange the vector so that all elements conform to the standard first_name last_name

#transform names containing a comma to first_name last_name format
names_with_comma <- name[which(str_detect(name, ","))]
comma_less <- unlist(strsplit(names_with_comma, ", "))
last_names <- comma_less[c(TRUE, FALSE)]
first_names <- comma_less[c(FALSE, TRUE)]
new_names <- paste(first_names, last_names, sep = " ")

#rejoin back into vector with names that did not contain a comma aka already in standard form
std_names <- name[! name %in% names_with_comma]
std_names <- c(std_names, new_names)
std_names

## [1] "Moe Szyslak"          "Rev. Timothy Lovejoy" "Ned Flanders"        
## [4] "Dr. Julius Hibbert"   "C. Montgomery Burns"  "Homer Simpson"

3.2

Construct logical vector indicating whether a charater has a title

with_title <- std_names[str_detect(std_names, "[:alpha:]{2,}\\.")]
#titles normally have more than 1 charater before the period
with_title

## [1] "Rev. Timothy Lovejoy" "Dr. Julius Hibbert"

3.3

Construct logical vector indicating whether a charater has a second name

with_2nd_name <- std_names[str_detect(std_names, "[A-Z]\\.[:blank:]\\w{1,}[:blank:]\\w{1,}")]
#Assuming the requirments of a second name would have to have a letter before a period and a name followed by a last name
with_2nd_name

## [1] "C. Montgomery Burns"

Question 4

4.1

[0-9]+\$

This would return any part of a string that contains one or more digits followed by a “$”.

test_strings1 <- c("helloworld1111$","m1n3cr4f7$","$1337","LUL")
result1 <- unlist(str_extract_all(test_strings1, pattern = "[0-9]+\\$" ))
result1

## [1] "1111$" "7$"

4.2

\b[a-z]{1,4}\b

This would return a part of a string if it contains a word of at least one lowercase letter with a maximum of 4 lowercase letters. This word would only contain lowercase letters.

test_strings2 <- c("bRuh","g2g","lul","lots of puppers")
result2 <- unlist(str_extract_all(test_strings2, pattern = "\\b[a-z]{1,4}\\b" ))
result2

## [1] "lul"  "lots" "of"

4.3

.*?\.txt$ this would return any string that ends with “.txt”.

test_strings3 <- c("issa file.txt",".txt","hw3.txt","nota.txt test")
result3 <- unlist(str_extract_all(test_strings3, pattern = ".*?\\.txt$" ))
result3

## [1] "issa file.txt" ".txt"          "hw3.txt"

4.4

\d{2}/\d{2}/\d{4}

This will return part of the strings that have 2 sets of 2 digits followed by a set of 4 digits separated by slashes “/”

test_strings4 <- c("09/15/2019","2/20/1992","yesterday was 09/14/2019","23/03/1234 wow")
result4 <- unlist(str_extract_all(test_strings4, pattern = "\\d{2}/\\d{2}/\\d{4}" ))
result4

## [1] "09/15/2019" "09/14/2019" "23/03/1234"

4.5

<(.+?)>.+?</\1>

This would return strings that contain <> with any charaters between the symbols. After that, any charaters. There is another set of <> but with a / in the middle plus the string that matches what’s within the parentheses.

test_strings5 <- c("<t>Kappa</t>","<1asdf>1234asdf</1asdf>","qwerty<tag>hihi</tag>","<mis>oh</match>")
result5 <- unlist(str_extract_all(test_strings5, pattern = "<(.+?)>.+?</\\1>" ))
result5

## [1] "<t>Kappa</t>"            "<1asdf>1234asdf</1asdf>"
## [3] "<tag>hihi</tag>"

Question 9

secret <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
msg <- str_replace_all(paste(unlist(str_extract_all(secret, "[[:upper:].]{1,}")), collapse = ""), "\\.", " ")
msg

## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"

ddeocampo-assignment-3

Diane DeOcampo

9/11/2019