Week 3 Regular Expressions

Question #3

# Bringing in the data
library(stringr)
library(XML)
library(RCurl)
library(tau)
raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5543642Dr. Julius Hibbert"

# Extract information
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name

## [1] "Moe Szyslak"          "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Julius Hibbert"

Setting the sub-strings

ok_names are names that are already in the right order
re_names are names that are reversed and may or maynot have a middle name
fo_names are names that are forward with a title

#first extract the names that are already correct  
ok_names <- c(name[1], name[4]) #already right
re_names <-c(name[2], name[5])  # reversed names
fo_names <-c(name[3], name[6])  # forward names with titles

Extracting the first and last names from reversed names without a title

#first name as last word in the string

 r_first <- function(x, i){str_extract(x[i], "\\w+$")}
 r_last <- function(x, i){str_extract(x[i], "^\\w+")}



 reverse_name_engine <- function(x){
  name_list_r = character()
  nn = character()
  for (i in 1:length(x)){
    f = r_first(x,i)
    l= r_last(x,i)

    #print(f)
    #print(m)
    #print(l)
    nn = paste(f, l)
    name_list_r =c(name_list_r, nn)}
  return(name_list_r)
 }

Functions extracting first and last names of forward names with titles

# First & last name after a title
title <- function(x, i){str_extract(x[i], "[[:alpha:]]+?\\.")}
first <- function(x, i){str_extract(x[i], "\\w+\\s")}
last <- function(x, i){str_extract(x[i], "\\w+$")}
 
 forward_name_engine <- function(x){
   name_list_f = character()
   nn = character()
   for (i in 1:length(x)){
     f = first(x,i)
     l= last(x,i)

     nn = paste(f, l)
     name_list_f =c(name_list_f, nn)}
   return(name_list_f)
 }

Part A - Running the functions and building new strings

# using functions to reorder the names left
rev_names<-reverse_name_engine(re_names)
for_names<-forward_name_engine(fo_names)
#recombining the names into a character vector 
three_one<-c(ok_names,rev_names, for_names )
three_one <- three_one[ c(1,3,5,2,4,6) ] #reordering to agree with title and middle name vectors

Part B

title_c<- function(x,i){str_extract(x[i],"[[:alpha:]]{2,}(?=\\.)[:punct:]" )}
title_check<-!is.na(title_c(name))
title_check

## [1] FALSE FALSE  TRUE FALSE FALSE  TRUE

Part C

#Three Three

middle <- function(x, i){str_extract(x[i], "[[:upper:]{1}]\\.")}
second_check <- !is.na(middle(name))
second_check

## [1] FALSE  TRUE FALSE FALSE FALSE FALSE

 name_table<-data.frame(cbind (three_one, title_check, second_check))
colnames(name_table) <- c("First_Last", "Preceeding_Titles", "Middle_Names")

New table

First_Last	Preceeding_Titles	Middle_Names
Moe Szyslak	FALSE	FALSE
Montgomery Burns	FALSE	TRUE
Timothy Lovejoy	TRUE	FALSE
Ned Flanders	FALSE	FALSE
Homer Simpson	FALSE	FALSE
Julius Hibbert	TRUE	FALSE

Week 3 Regular Expressions

Bethany Poulin

September 16, 2017

Question #3

Setting the sub-strings

Extracting the first and last names from reversed names without a title

Functions extracting first and last names of forward names with titles

Part A - Running the functions and building new strings

Part B

Part C

New table

Question 4

Question 9