3-1
#3-1
library(stringr)
## Warning: package 'stringr' was built under R version 3.4.1
raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555
-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
name1 <- ifelse(grepl( ",",name),paste(word(name,-1),word(name,1)),name)
name2 <- gsub("Rev.|Dr.|,","",name1)
name2 <- trimws(name2)
name2
## [1] "Moe Szyslak" "Montgomery Burns" "Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Julius Hibbert"
3-2
#3-2
name
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
name_title_detect <- str_detect(name, "Rev.|Dr.")
name_title <- data.frame(cbind(name, name_title_detect))
name_title
## name name_title_detect
## 1 Moe Szyslak FALSE
## 2 Burns, C. Montgomery FALSE
## 3 Rev. Timothy Lovejoy TRUE
## 4 Ned Flanders FALSE
## 5 Simpson, Homer FALSE
## 6 Dr. Julius Hibbert TRUE
3-3
#3-3
grepl(" ", name2)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE
4-1
#4-1
a <- c('1$','$2','33.345$','35555553$')
grep("[0-9]+\\$",a)
## [1] 1 3 4
str_extract(a,"[0-9]+\\$")
## [1] "1$" NA "345$" "35555553$"
#One or more digits start with 0 to 9 and ends with $
4-2
#4-2
a <- c("ea", "f", "ea1", "abcd", "abc", "Aade", "aaaaaaaaaa")
grep("\\b[a-z]{1,4}\\b",a)
## [1] 1 2 4 5
str_extract(a, "\\b[a-z]{1,4}\\b")
## [1] "ea" "f" NA "abcd" "abc" NA NA
## string boundary 1 to 4 lower case English character only from A to Z
4-3
## 4-3
a <- c("a.txt", "a.txt$", "atxt", "1.txt", ".txt", ".d.txt")
grep(".*?\\.txt$",a)
## [1] 1 4 5 6
str_extract(a, ".*?\\.txt$")
## [1] "a.txt" NA NA "1.txt" ".txt" ".d.txt"
## find all matches of 0 or more number of string and numeric characters that end with .txt
4-4
## 4-4
a <- c("01/10/2017", "01/11/222", "11-11-1111")
grep("\\d{2}/\\d{2}/\\d{4}",a)
## [1] 1
str_extract(a,"\\d{2}/\\d{2}/\\d{4}")
## [1] "01/10/2017" NA NA
## find 2 digits / 2 digits / 4 digits
4-5
### 4-5
a <- c("<html>de35</html>", "<html>?<html>", "<d></d>", "<d>ggds</e>", "<44>?</44>")
grep("<(.+?)>.+?</\\1>",a)
## [1] 1 5
str_extract(a, "<(.+?)>.+?</\\1>")
## [1] "<html>de35</html>" NA NA
## [4] NA "<44>?</44>"
##<1st pattern> at least 1 character </following 1st pattern match>
9
### 9
raw.data<-"clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
a <-unlist(str_extract_all(raw.data, "[:upper:]"))
a
## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" "Y" "O"
## [18] "U" "A" "R" "E" "A" "S" "U" "P" "E" "R" "N" "E" "R" "D"