# Bringing in the data
library(stringr)
library(XML)
library(RCurl)
library(tau)
raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5543642Dr. Julius Hibbert"
# Extract information
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
ok_names are names that are already in the right order
re_names are names that are reversed and may or maynot have a middle name
fo_names are names that are forward with a title
#first extract the names that are already correct
ok_names <- c(name[1], name[4]) #already right
re_names <-c(name[2], name[5]) # reversed names
fo_names <-c(name[3], name[6]) # forward names with titles
#first name as last word in the string
r_first <- function(x, i){str_extract(x[i], "\\w+$")}
r_last <- function(x, i){str_extract(x[i], "^\\w+")}
reverse_name_engine <- function(x){
name_list_r = character()
nn = character()
for (i in 1:length(x)){
f = r_first(x,i)
l= r_last(x,i)
#print(f)
#print(m)
#print(l)
nn = paste(f, l)
name_list_r =c(name_list_r, nn)}
return(name_list_r)
}
# First & last name after a title
title <- function(x, i){str_extract(x[i], "[[:alpha:]]+?\\.")}
first <- function(x, i){str_extract(x[i], "\\w+\\s")}
last <- function(x, i){str_extract(x[i], "\\w+$")}
forward_name_engine <- function(x){
name_list_f = character()
nn = character()
for (i in 1:length(x)){
f = first(x,i)
l= last(x,i)
nn = paste(f, l)
name_list_f =c(name_list_f, nn)}
return(name_list_f)
}
# using functions to reorder the names left
rev_names<-reverse_name_engine(re_names)
for_names<-forward_name_engine(fo_names)
#recombining the names into a character vector
three_one<-c(ok_names,rev_names, for_names )
three_one <- three_one[ c(1,3,5,2,4,6) ] #reordering to agree with title and middle name vectors
title_c<- function(x,i){str_extract(x[i],"[[:alpha:]]{2,}(?=\\.)[:punct:]" )}
title_check<-!is.na(title_c(name))
title_check
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
#Three Three
middle <- function(x, i){str_extract(x[i], "[[:upper:]{1}]\\.")}
second_check <- !is.na(middle(name))
second_check
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
name_table<-data.frame(cbind (three_one, title_check, second_check))
colnames(name_table) <- c("First_Last", "Preceeding_Titles", "Middle_Names")
First_Last | Preceeding_Titles | Middle_Names |
---|---|---|
Moe Szyslak | FALSE | FALSE |
Montgomery Burns | FALSE | TRUE |
Timothy Lovejoy | TRUE | FALSE |
Ned Flanders | FALSE | FALSE |
Homer Simpson | FALSE | FALSE |
Julius Hibbert | TRUE | FALSE |
(a) [0-9]+\$ - This selects a string of numbers ending with a dollar sign
(b) \b[a-z]{1,4}\b - This selects as string of lowercase letters between a and z in any order at leas one and up to 4 characters long
(c) .*?\.txt$ - This selects any amount of characters preceding and including “.txt” (it works back from “.txt”)
(d) \d{2}/\d{2}/\d{4} - This selects two digits followed by a “/” followed by two digits followed by “/” followed by four digits…it is basically a date extractor
(e) <(.+?)>.+?</\1> - This is confounding and I believe based on evaluation and testing it selects nothing
secret_message <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
decode <- str_extract_all(secret_message,"[[:upper:].]")
answer<-paste(unlist(decode), collapse = '')
answer<- str_replace_all(answer, "[[:punct:]]", " ")
answer
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"