title: “Data607_Assignment3” author: “Violeta Stoyanova” date: “2/17/2018” output: html_document —
library(stringr)
#3. Copy the introductory example. The vector name stores the extracted names and put them into a dataframe.
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
name<- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name_df<- data.frame(name = name)
name_df
## name
## 1 Moe Szyslak
## 2 Burns, C. Montgomery
## 3 Rev. Timothy Lovejoy
## 4 Ned Flanders
## 5 Simpson, Homer
## 6 Dr. Julius Hibbert
#3.1. Use the tools of this chapter to rearrange the vector so that all the elements conform to the standard first_name, last_name
name_rear<-str_replace_all(name, "(.*), (.*)", "\\2 \\1")
name_rear
## [1] "Moe Szyslak" "C. Montgomery Burns" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Dr. Julius Hibbert"
#Removing the titles and middle names
f_l_name<-str_replace_all(name_rear,"[\\w]+\\. ", "")
f_l_name
## [1] "Moe Szyslak" "Montgomery Burns" "Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Julius Hibbert"
#3.2. Construct a logical vector whether or not a name has a title
title <- str_detect(name, "[[:alpha:]]{2,3}\\. ")
title
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
#3.3 Construct a logical vector whether or not there is a second name
second_name<- str_detect(name_rear, "^([[:alpha:]]\\. )")
second_name
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
#4.1.A string with a pattern of numbers that occurs at least once and followed by the literal $ sign
sample <-"This is 7645$"
str_extract_all(sample, "[0-9]+\\$")
## [[1]]
## [1] "7645$"
#4.2 A pattern that looks for the first occurrence of a word with only lowercase letters with length of 1 to 4.
sample <-"This is sane pick"
str_extract_all(sample, "\\b[a-z]{1,4}\\b")
## [[1]]
## [1] "is" "sane" "pick"
#4.3 A pattern that looks for any combination of characters than ends in .txt
sample <- "this is my file.txt"
str_extract_all(sample, ".*?\\.txt$")
## [[1]]
## [1] "this is my file.txt"
#4.4 A pattern that looks for (2 numbers)/(2 numbers)/(4 numbers) perfect for dates/birthdays
sample <- "85928/08/1987943"
str_extract_all(sample, "\\d{2}/\\d{2}/\\d{4}")
## [[1]]
## [1] "28/08/1987"
#4.5 It extracts html tags and the content within them
sample <-"this <p>an extraction </p>"
str_extract_all(sample, "<(.+?)>.+?</\\1>")
## [[1]]
## [1] "<p>an extraction </p>"
#9 The following code hides a secret message. Crack it with R and regular expressions. Hint: Some of the characters are more revealing than others! The code snippet is also available in the materials at www.r-datacollection.com.
data <- 'clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr'
secret_messege<-str_extract_all(data, '[:upper:]')
secret_messege
## [[1]]
## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" "Y" "O"
## [18] "U" "A" "R" "E" "A" "S" "U" "P" "E" "R" "N" "E" "R" "D"