Question 3

library(stringr)
library(knitr)

raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"

name<-unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
phone<-unlist(str_extract_all(raw.data, "\\(?(\\d{3})?\\)?(-| )?\\d{3}(-| )?\\d{4}"))

name
## [1] "Moe Szyslak"          "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Simpson, Homer"       "Dr. Julius Hibbert"

3.1)

kable(data.frame(name,phone))
name phone
Moe Szyslak 555-1239
Burns, C. Montgomery (636) 555-0113
Rev. Timothy Lovejoy 555-6542
Ned Flanders 555 8904
Simpson, Homer 636-555-3226
Dr. Julius Hibbert 5553642
  • first_name last_name format:
for(i in 1:length(name)) {
  if(str_detect(name[i],",")==TRUE) {
    temp<-unlist(str_split(name[i],","))
    name[i]<-paste(temp[2], temp[1], sep=" ")
  }
}
kable(data.frame(name))
name
Moe Szyslak
C. Montgomery Burns
Rev. Timothy Lovejoy
Ned Flanders
Homer Simpson
Dr. Julius Hibbert

3.2)

with_title<-str_detect(name, "Rev.|Dr.")
title<-c()
for(j in 1:length(with_title)) {
  if(with_title[j]==FALSE){
    title[j]<-'NA'
  } else if(str_detect(name[j], "Rev.")=='TRUE') {
    title[j]<-"Rev."
  } else {
    title[j]<-"Dr."
  }
}
kable(data.frame(name, with_title, title))
name with_title title
Moe Szyslak FALSE NA
C. Montgomery Burns FALSE NA
Rev. Timothy Lovejoy TRUE Rev.
Ned Flanders FALSE NA
Homer Simpson FALSE NA
Dr. Julius Hibbert TRUE Dr.

3.3)

middle_name<-str_count(unlist(str_extract_all(name, "[^Dr\\. |Rev\\. ].+")), " ")
middle_name<-ifelse(middle_name==2, "yes", "no")
kable(data.frame(name, middle_name))
name middle_name
Moe Szyslak no
C. Montgomery Burns yes
Rev. Timothy Lovejoy no
Ned Flanders no
Homer Simpson no
Dr. Julius Hibbert no

Question 4

v<-c("abc$123", "$123", "123$", "1$23", "data*", "tournamentinfo.txt", "science", "MSDS", "CUNY", "Texas", "Austin.", "<title> Data Science </title>", "txt.tournamentinfo", "04/24/1915", "12/12/12", "<table> List of dates <table>")

4.1)

[0-9]+\\$, string of digit(s) followed by a dollar sign

unlist(str_extract_all(v, "[0-9]+\\$"))
## [1] "123$" "1$"

4.2)

\\b[a-z]{1,4}\\b, string of lower case letters that ranges between 1 to 4 letters

unlist(str_extract_all(v, "\\b[a-z]{1,4}\\b"))
## [1] "abc"  "data" "txt"  "txt"  "of"

4.3)

.*?\\.txt$, string with any character(s) or number(s) that ends with .txt

unlist(str_extract_all(v, ".*?\\.txt$"))
## [1] "tournamentinfo.txt"

4.4)

\\d{2}/\\d{2}/\\d{4}, string that contains 2 digits with a slash then 2 digits and a slash and followed by 4 digits (e.g. numeric birthdate)

unlist(str_extract_all(v, "\\d{2}/\\d{2}/\\d{4}"))
## [1] "04/24/1915"

4.5)

<(.+?)>.+?</\\1>, string that has an opening and closing brackets at the beginning, then can take any string in the middle, then opening and closing brackets with a forward slash (e.g. html coding): <values> anything </values>

unlist(str_extract_all(v, "<(.+?)>.+?</\\1>"))
## [1] "<title> Data Science </title>"

Question 9

hidden<-"clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"

uppercase<-paste(unlist(str_extract_all(hidden, "[[A-Z].?]")), collapse="")
lowercase<-paste(unlist(str_extract_all(hidden, "[[a-z].?]")), collapse="")
numbers<-paste(unlist(str_extract_all(hidden, "[[0-9].?]")), collapse="")

uppercase
## [1] "CONGRATULATIONS.YOU.ARE.A.SUPERNERD"
lowercase
## [1] "clcopowzmstcdwnkigvdicpuggvhrynjuwczihqrfpxsjdwpnanwowisdijjkpfdrcocbtyczjataootjtjnecfek.rwwwojigdvrfrbz.bknbhzgvizcrop.wgnb.qofaotfbwmktszqefyndtkcfgmcgxonhkgr"
numbers
## [1] "1087792855078035307553364.1162.24905..651724639589659490545"
  • The meaningful sentence is the following:
uppercase<-str_replace_all(uppercase, "[.]", " ")
kable(data.frame(uppercase))
uppercase
CONGRATULATIONS YOU ARE A SUPERNERD