raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
name_1 = str_replace(name, "[A-z]{1,3}\\. ", "")
name_1## [1] "Moe Szyslak" "Burns, Montgomery" "Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Julius Hibbert"
name_2=str_replace(name_1,"(\\w+),\\s(\\w+)","\\2 \\1")
name_2## [1] "Moe Szyslak" "Montgomery Burns" "Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Julius Hibbert"
first_last_name = data.frame(t(sapply(strsplit(name_2," "),head,2)))
names(first_last_name) = c('first','last')
first_last_name## first last
## 1 Moe Szyslak
## 2 Montgomery Burns
## 3 Timothy Lovejoy
## 4 Ned Flanders
## 5 Homer Simpson
## 6 Julius Hibbert
name## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
title =str_detect(name,"^[A-z]{1,3}\\. ") ### 1 to 3 alphabet positioning at the start of a string followed by '. '.
cbind(name,title)## name title
## [1,] "Moe Szyslak" "FALSE"
## [2,] "Burns, C. Montgomery" "FALSE"
## [3,] "Rev. Timothy Lovejoy" "TRUE"
## [4,] "Ned Flanders" "FALSE"
## [5,] "Simpson, Homer" "FALSE"
## [6,] "Dr. Julius Hibbert" "TRUE"
sec_name = str_detect(name,"\\s[A-z]\\. ") ### ' ' +second_name +'. '
cbind(name,sec_name)## name sec_name
## [1,] "Moe Szyslak" "FALSE"
## [2,] "Burns, C. Montgomery" "TRUE"
## [3,] "Rev. Timothy Lovejoy" "FALSE"
## [4,] "Ned Flanders" "FALSE"
## [5,] "Simpson, Homer" "FALSE"
## [6,] "Dr. Julius Hibbert" "FALSE"
regexp_4_1 = '[0-9]+\\$'
test_4_1 = 'abs1111$0110100101 '
str_extract_all(test_4_1,regexp_4_1)## [[1]]
## [1] "1111$"
regexp_4_2 = "\\b[a-z]{1,4}\\b"
test_4_2 = 'cadfc abw'
str_extract_all(test_4_2,regexp_4_2)## [[1]]
## [1] "abw"
regexp_4_3 = ".*?\\.txt$"
test_4_3 = '.\test.txt'
str_extract_all(test_4_3,regexp_4_3)## [[1]]
## [1] ".\test.txt"
regexp_4_4 = "\\d{2}/\\d{2}/\\d{4}"
test_4_4 = '11/11/1111'
str_extract_all(test_4_4,regexp_4_4)## [[1]]
## [1] "11/11/1111"
print('<any character> any character </contents of group 1>')## [1] "<any character> any character </contents of group 1>"
regexp_4_5 = "<(.+?)>.+?</\\1>"
test_4_5 = '<span>20 flips come up heads</span>'
str_extract_all(test_4_5,regexp_4_5)## [[1]]
## [1] "<span>20 flips come up heads</span>"