library(stringr)
library(knitr)
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
name<-unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
phone<-unlist(str_extract_all(raw.data, "\\(?(\\d{3})?\\)?(-| )?\\d{3}(-| )?\\d{4}"))
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
kable(data.frame(name,phone))
name | phone |
---|---|
Moe Szyslak | 555-1239 |
Burns, C. Montgomery | (636) 555-0113 |
Rev. Timothy Lovejoy | 555-6542 |
Ned Flanders | 555 8904 |
Simpson, Homer | 636-555-3226 |
Dr. Julius Hibbert | 5553642 |
for(i in 1:length(name)) {
if(str_detect(name[i],",")==TRUE) {
temp<-unlist(str_split(name[i],","))
name[i]<-paste(temp[2], temp[1], sep=" ")
}
}
kable(data.frame(name))
name |
---|
Moe Szyslak |
C. Montgomery Burns |
Rev. Timothy Lovejoy |
Ned Flanders |
Homer Simpson |
Dr. Julius Hibbert |
with_title<-str_detect(name, "Rev.|Dr.")
title<-c()
for(j in 1:length(with_title)) {
if(with_title[j]==FALSE){
title[j]<-'NA'
} else if(str_detect(name[j], "Rev.")=='TRUE') {
title[j]<-"Rev."
} else {
title[j]<-"Dr."
}
}
kable(data.frame(name, with_title, title))
name | with_title | title |
---|---|---|
Moe Szyslak | FALSE | NA |
C. Montgomery Burns | FALSE | NA |
Rev. Timothy Lovejoy | TRUE | Rev. |
Ned Flanders | FALSE | NA |
Homer Simpson | FALSE | NA |
Dr. Julius Hibbert | TRUE | Dr. |
middle_name<-str_count(unlist(str_extract_all(name, "[^Dr\\. |Rev\\. ].+")), " ")
middle_name<-ifelse(middle_name==2, "yes", "no")
kable(data.frame(name, middle_name))
name | middle_name |
---|---|
Moe Szyslak | no |
C. Montgomery Burns | yes |
Rev. Timothy Lovejoy | no |
Ned Flanders | no |
Homer Simpson | no |
Dr. Julius Hibbert | no |
v<-c("abc$123", "$123", "123$", "1$23", "data*", "tournamentinfo.txt", "science", "MSDS", "CUNY", "Texas", "Austin.", "<title> Data Science </title>", "txt.tournamentinfo", "04/24/1915", "12/12/12", "<table> List of dates <table>")
[0-9]+\\$, string of digit(s) followed by a dollar sign
unlist(str_extract_all(v, "[0-9]+\\$"))
## [1] "123$" "1$"
\\b[a-z]{1,4}\\b, string of lower case letters that ranges between 1 to 4 letters
unlist(str_extract_all(v, "\\b[a-z]{1,4}\\b"))
## [1] "abc" "data" "txt" "txt" "of"
.*?\\.txt$, string with any character(s) or number(s) that ends with .txt
unlist(str_extract_all(v, ".*?\\.txt$"))
## [1] "tournamentinfo.txt"
\\d{2}/\\d{2}/\\d{4}, string that contains 2 digits with a slash then 2 digits and a slash and followed by 4 digits
(e.g. numeric birthdate)
unlist(str_extract_all(v, "\\d{2}/\\d{2}/\\d{4}"))
## [1] "04/24/1915"
<(.+?)>.+?</\\1>, string that has an opening and closing brackets at the beginning, then can take any string in the
middle, then opening and closing brackets with a forward slash (e.g. html coding): <values> anything </values>
unlist(str_extract_all(v, "<(.+?)>.+?</\\1>"))
## [1] "<title> Data Science </title>"
hidden<-"clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
uppercase<-paste(unlist(str_extract_all(hidden, "[[A-Z].?]")), collapse="")
lowercase<-paste(unlist(str_extract_all(hidden, "[[a-z].?]")), collapse="")
numbers<-paste(unlist(str_extract_all(hidden, "[[0-9].?]")), collapse="")
uppercase
## [1] "CONGRATULATIONS.YOU.ARE.A.SUPERNERD"
lowercase
## [1] "clcopowzmstcdwnkigvdicpuggvhrynjuwczihqrfpxsjdwpnanwowisdijjkpfdrcocbtyczjataootjtjnecfek.rwwwojigdvrfrbz.bknbhzgvizcrop.wgnb.qofaotfbwmktszqefyndtkcfgmcgxonhkgr"
numbers
## [1] "1087792855078035307553364.1162.24905..651724639589659490545"
uppercase<-str_replace_all(uppercase, "[.]", " ")
kable(data.frame(uppercase))
uppercase |
---|
CONGRATULATIONS YOU ARE A SUPERNERD |