library(stringr)
## Warning: package 'stringr' was built under R version 3.5.1
raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5543642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
##Use str_detect to locate names with a comma.
name[which(name %in% name[str_detect(name, ",")])]
## [1] "Burns, C. Montgomery" "Simpson, Homer"
#Function that detects commas. Then splits the string into arrays. Returns concatenated string in order of first name and last name.
convert_names <- function(name){
if(str_detect(name, ",")==TRUE){
fullname <- str_split(name, ", ")
LastName <-fullname[[1]][1]
FirstName <- fullname[[1]][2]
newName <- str_c(FirstName, " ", LastName)
return(newName)
}else
return(name)
}
#Applies the function to each element in the vector
for (i in seq_along(name)){
name[i] <- convert_names(name[i])
}
name
## [1] "Moe Szyslak" "C. Montgomery Burns" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Dr. Julius Hibbert"
3(b) Construct a logical vector indicating whether a character has a title (i.e., Rev. and Dr.).
#Use str_dectect to find titles
logical_name <- str_detect(name, "Dr.|Rev.|Mayor|Father|Reverend|Doctor")
logical_name
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
3(c) Construct a logical vector indicating whether a character has a second name.
#Remove the strings and the punctuation, and trim the whitespace to get rid of titles.
name2 <- str_trim(str_replace(name, pattern = "([[:alpha:]]{1,3}[[:punct:]])", replacement = ""))
#Create logical vector by counting the number of words. If equals to 2, then each name has a second name.
name3 <- str_count(name2, "\\w+") == 2
name3
## [1] TRUE TRUE TRUE TRUE TRUE TRUE
text <- "At least 20 currencies express money with a dollar sign at the end like so, 500$."
unlist(str_extract_all(text, "[0-9]+\\$"))
## [1] "500$"
text2 <- "The diretory on your computer to find the files is your home directory but be careful."
unlist(str_extract_all(text2, "\\b[a-z]{1,4}\\b"))
## [1] "on" "your" "to" "find" "the" "is" "your" "home" "but" "be"
files <- "A Presentation.txt"
unlist(str_extract_all(files, ".*?\\.txt$"))
## [1] "A Presentation.txt"
date <- "Today's date is 09/16/1964."
unlist(str_extract_all(date, "\\d{2}/\\d{2}/\\d{4}"))
## [1] "09/16/1964"
tags <- c('<bbbb>Bold Text</bbbb>', '<ul>underline</ul>', '<i>italics</i>', '<xml>xml tag</xml>')
unlist(str_extract(tags,"<(.+?)>.+?</\\1>"))
## [1] "<bbbb>Bold Text</bbbb>" "<ul>underline</ul>"
## [3] "<i>italics</i>" "<xml>xml tag</xml>"