library(stringr)
library(kableExtra)
library(knitr)
3.) Copy the introductory example. The vector name stores the extracted names.
Construct a logical vector indicating whether a character has a title (i.e., Rev. and Dr.).
Construct a logical vector indicating whether a character has a second name.
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
#extract words
originalNames <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
originalNames
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
#extract middle names
names1 <- str_replace(originalNames, "\\s[A-z]\\. ", " ")
names1
## [1] "Moe Szyslak" "Burns, Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
# change first name to last- found word groups then changed them
names2 <- str_replace(names1, "(\\w+),\\s(\\w+)", "\\2 \\1")
names2
## [1] "Moe Szyslak" "Montgomery Burns" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Dr. Julius Hibbert"
#Removed title of name of Rev. Timothy Lovejoy, Dr. Julius Hibbert
newNames <- str_replace(names2, "[A-z]{2,3}\\. "," ")
newNames
## [1] "Moe Szyslak" "Montgomery Burns" " Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" " Julius Hibbert"
originalN <- data.frame(originalNames)
newN <- data.frame(newNames)
# create a table with Kable to list the differences
kable(list(originalN,newN),caption='Names')
|
|
title <- str_detect(names2,"[A-z]{2,3}\\. ")
df1 <- data.frame(names2, title)
df1
## names2 title
## 1 Moe Szyslak FALSE
## 2 Montgomery Burns FALSE
## 3 Rev. Timothy Lovejoy TRUE
## 4 Ned Flanders FALSE
## 5 Homer Simpson FALSE
## 6 Dr. Julius Hibbert TRUE
title <- str_detect(originalNames,"[A-Z]{1}\\." )
df2 <- data.frame(originalNames, title)
df2
## originalNames title
## 1 Moe Szyslak FALSE
## 2 Burns, C. Montgomery TRUE
## 3 Rev. Timothy Lovejoy FALSE
## 4 Ned Flanders FALSE
## 5 Simpson, Homer FALSE
## 6 Dr. Julius Hibbert FALSE
4.) Describe the types of strings that conform to the following regular expressions and construct an example that is matched by the regular expression.
a [0-9]+$ b. c. *?.txt$ d. // e. <(.+?)>.+?</>
ex_one <- "15689142$now!@23$"
str_extract_all(ex_one, "[0-9]+\\$" )
## [[1]]
## [1] "15689142$" "23$"
ex_two <- " Today is a day great"
str_extract_all(ex_two, "\\b[a-z]{1,4}\\b")
## [[1]]
## [1] "is" "a" "day"
ex_three <- "c:/local/amanda/homework.txt"
str_extract_all(ex_three, ".*?\\.txt$")
## [[1]]
## [1] "c:/local/amanda/homework.txt"
ex_four <- "07/02/1983"
str_extract_all(ex_four, "\\d{2}/\\d{2}/\\d{4}")
## [[1]]
## [1] "07/02/1983"
ex_five <- "<p>What kind of line is this, a paragraph</p>"
str_extract_all(ex_five, "<(.+?)>.+?</\\1>")
## [[1]]
## [1] "<p>What kind of line is this, a paragraph</p>"
data <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
#takes out all capitalperiods and letters
mystery <- unlist(str_extract_all(data, "[[A-Z].]"))
mystery
## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" "." "Y"
## [18] "O" "U" "." "A" "R" "E" "." "A" "." "S" "U" "P" "E" "R" "N" "E" "R"
## [35] "D"
# Here I joined the letters together, and removed the spaces
mystery <- paste(mystery, collapse = "")
#replaced the '.'s with a space
str_replace_all(mystery, "[.]", " ")
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"