HW3_DATA607

raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"

#page 206 Automated data collection with R
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))

firstName = unlist(str_extract_all(name, "[:punct:] [[:alpha:]]{2,}$|[[:alpha:]]{2,} "))
firstName = unlist(str_extract_all(firstName, "[[:alpha:]]{2,}"))


lastName  = unlist(str_extract_all(name, "[^[:punct:]] [[:alpha:]]{2,}$|[[:alpha:]]{2,}, "))
lastName = unlist(str_extract_all(lastName, "[[:alpha:]]{2,}"))

title     = unlist(str_extract_all(name, "[[:alpha:]]{2,}\\."))
print(title)

## [1] "Rev." "Dr."

Construct a logical vector indicating whether a character has a title

firstLastdf <- data.frame("FistName" = firstName, "LastName" = lastName, "Title" = title)

hasTitledf <- data.frame("Name" = name, "HasTitle" = str_detect(name, title))

hasLastNamedf <- data.frame("Name" = name, "HasLastName" = str_detect(name, lastName))
print(hasTitledf)

##                   Name HasTitle
## 1          Moe Szyslak    FALSE
## 2 Burns, C. Montgomery    FALSE
## 3 Rev. Timothy Lovejoy     TRUE
## 4         Ned Flanders    FALSE
## 5       Simpson, Homer    FALSE
## 6   Dr. Julius Hibbert     TRUE

print(hasLastNamedf)

##                   Name HasLastName
## 1          Moe Szyslak        TRUE
## 2 Burns, C. Montgomery        TRUE
## 3 Rev. Timothy Lovejoy        TRUE
## 4         Ned Flanders        TRUE
## 5       Simpson, Homer        TRUE
## 6   Dr. Julius Hibbert        TRUE

Construct a logical vector indicating whether a character has a second name Notes: is an escape character is digits is word edge > is word end < is word beginning ? is preceding item is optional and will be matched at most once * preceding item is matched zero or more times + preceding item is matched one or more times {n} preceding item is matched n times {n,m} preceding item is matched at least n times, but not more than m times

re="[0-9]+\\$"
str_detect("13443$", re)

## [1] TRUE

\b[a-z]{1,4}\b String set, then alphabet characters repeated between 1-4 times, then another string set.

re="\\b[a-z]{1,4}\\b"
str_detect("$abcd$", re)

## [1] TRUE

.*?\.txt$ accepts any character except line breaks 0 or more times [? lazy], escaped character, then the file ending .txt so it accepts sdfnsdnkdsds.txt, and .txt

re=".*?\\.txt$"
str_detect("$abcd.txt", re)

## [1] TRUE

\d{2}/\d{2}/\d{4} #[0-9][0-9] / [0-9][0-9] / [0-9][0-9][0-9] i.e. 10/12/2005

re="\\d{2}/\\d{2}/\\d{4}"
str_detect("10/12/1996", re)

## [1] TRUE

<(.+?)>.+?</\1> accepts string

re="<(.+?)>.+?</\\1>"
str_detect("<div>w3skewls</div>", re)

## [1] TRUE

secret <- 
     "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
     Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
     d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
     fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
message <- unlist(str_extract_all(secret, "[[:upper:].]{1,}"))
message <- str_replace_all(paste(message, collapse = ''), "[.]", " "); message

## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"

HW3_DATA607

alexander

9/10/2018