library(stringr)
name <- c("Moe Szyslak", "Burns, C. Montgomery", "Rev. Timothy Lovejoy", "Ned Flanders", "Simpson, Homer", "Dr. Julius Hibbert")
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
3a. Reorder names that start with their last name.
Split those names separated by a comma into two vectors, trim, and reverse order.
splitname <- sapply(sapply(str_split(name, ","), str_trim),rev)
splitname
## [[1]]
## [1] "Moe Szyslak"
##
## [[2]]
## [1] "C. Montgomery" "Burns"
##
## [[3]]
## [1] "Rev. Timothy Lovejoy"
##
## [[4]]
## [1] "Ned Flanders"
##
## [[5]]
## [1] "Homer" "Simpson"
##
## [[6]]
## [1] "Dr. Julius Hibbert"
Then reverse the vector order, paste the vectors together with a space in between, trim the unnecessary space - but only for vectors that had their last name first. Then turn the list back into a vector
for (i in 1:length(name)) {
splitname[i]<- paste(unlist(splitname[i]), collapse = " ")
}
splitname <- unlist(splitname)
splitname
## [1] "Moe Szyslak" "C. Montgomery Burns" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Dr. Julius Hibbert"
The code summed together:
splitname <- sapply(sapply(str_split(name, ","), str_trim),rev)
for (i in 1:length(name)) {
splitname[i]<- paste(unlist(splitname[i]), collapse = " ")
}
splitname <- unlist(splitname)
splitname
## [1] "Moe Szyslak" "C. Montgomery Burns" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Dr. Julius Hibbert"
3b. Identify if the names in the vectors have a title - I use the rule “if a name starts with two or more letters followed by a period, it has a title”
title <- str_detect(name, "^[[:alpha:]]{2,}\\.")
title
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
3c. We’ll remove the titles, and then count the number of spaces in each vector element. If the number of spaces is more than one, we conclude someone has more than one given name.
untitled <- str_trim(str_replace(splitname, "^[[:alpha:]]{2,}\\.",""))
polynymous <- str_count(untitled, " ") > 1
polynymous
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
Problem 7. Correct expression of extracting HTML tag
breaking <- c("<title>+++BREAKING NEWS+++</title>")
extract <- str_extract(breaking, "<.+>")
The regular expression interprets this as “grab every character after <” A corrected version follows:
breaking <- c("<title>+++BREAKING NEWS+++</title>")
str_extract(breaking, "^<[:alnum:]+>")
## [1] "<title>"
Problem 8. Extract a formula from a string
binomialstring <- c("(5-3)^2=5^2-2*5*3+3^2 conforms to the binomial theorem")
str_extract(binomialstring, "[^0-9=+*()]+")
## [1] "-"
This doesn’t work, because the caret and the hyphen are metacharacters that need to be preceded by a double slash (i.e. \ ), to indicate that those characters are being referenced specifically.
str_extract(binomialstring, "[\\^\\-0-9=+*()]+")
## [1] "(5-3)^2=5^2-2*5*3+3^2"
Alternatively, since the formula is comprised of non-alphabet characters, we can simply exclude the alphabet characters, and trim any hanging spaces.
str_trim(str_extract(binomialstring, "[^[:alpha:]]+"))
## [1] "(5-3)^2=5^2-2*5*3+3^2"
code <- c("clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr")
code <- str_replace_all(code, "e", "*")
alphanum <- c(LETTERS, letters, 0:9,"\\.","\\!")
#str_replace_all(code, alphanum[which.max(charcount)], "e")