title: “Data607_Assignment3” author: “Violeta Stoyanova” date: “2/17/2018” output: html_document —

library(stringr)
#3. Copy the introductory example. The vector name stores the extracted names and put them into a dataframe.
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"  
name<- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name_df<- data.frame(name = name)
name_df

##                   name
## 1          Moe Szyslak
## 2 Burns, C. Montgomery
## 3 Rev. Timothy Lovejoy
## 4         Ned Flanders
## 5       Simpson, Homer
## 6   Dr. Julius Hibbert

#3.1. Use the tools of this chapter to rearrange the vector so that all the elements conform to the standard first_name, last_name
name_rear<-str_replace_all(name, "(.*), (.*)", "\\2 \\1")
name_rear

## [1] "Moe Szyslak"          "C. Montgomery Burns"  "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Homer Simpson"        "Dr. Julius Hibbert"

#Removing the titles and middle names
f_l_name<-str_replace_all(name_rear,"[\\w]+\\. ", "")
f_l_name

## [1] "Moe Szyslak"      "Montgomery Burns" "Timothy Lovejoy" 
## [4] "Ned Flanders"     "Homer Simpson"    "Julius Hibbert"

#3.2. Construct a logical vector whether or not a name has a title 
title <- str_detect(name, "[[:alpha:]]{2,3}\\. ") 
title

## [1] FALSE FALSE  TRUE FALSE FALSE  TRUE

#3.3 Construct a logical vector whether or not there is a second name
second_name<- str_detect(name_rear, "^([[:alpha:]]\\. )") 
second_name

## [1] FALSE  TRUE FALSE FALSE FALSE FALSE

#4.1.A string with a pattern of numbers that occurs at least once and followed by the literal $ sign
sample <-"This is 7645$"
str_extract_all(sample, "[0-9]+\\$")

## [[1]]
## [1] "7645$"

#4.2 A pattern that looks for the first occurrence of a word with only lowercase letters with length of 1 to 4.
sample <-"This is sane pick"
str_extract_all(sample, "\\b[a-z]{1,4}\\b")

## [[1]]
## [1] "is"   "sane" "pick"

#4.3 A pattern that looks for any combination of characters than ends in .txt
sample <- "this is my file.txt"
str_extract_all(sample, ".*?\\.txt$")

## [[1]]
## [1] "this is my file.txt"

#4.4 A pattern that looks for (2 numbers)/(2 numbers)/(4 numbers) perfect for dates/birthdays
sample <- "85928/08/1987943"
str_extract_all(sample, "\\d{2}/\\d{2}/\\d{4}")

## [[1]]
## [1] "28/08/1987"

#4.5 It extracts html tags and the content within them
sample <-"this <p>an extraction </p>"
str_extract_all(sample, "<(.+?)>.+?</\\1>")

## [[1]]
## [1] "<p>an extraction </p>"

#9 The following code hides a secret message. Crack it with R and regular expressions. Hint: Some of the characters are more revealing than others! The code snippet is also available in the materials at www.r-datacollection.com.
data <- 'clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr'
secret_messege<-str_extract_all(data, '[:upper:]') 
secret_messege

## [[1]]
##  [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" "Y" "O"
## [18] "U" "A" "R" "E" "A" "S" "U" "P" "E" "R" "N" "E" "R" "D"