raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
name_1 = str_replace(name, "[A-z]{1,3}\\. ", "")
name_1## [1] "Moe Szyslak" "Burns, Montgomery" "Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Julius Hibbert"
name_2=str_replace(name_1,"(\\w+),\\s(\\w+)","\\2 \\1")
name_2## [1] "Moe Szyslak" "Montgomery Burns" "Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Julius Hibbert"
first_last_name = data.frame(t(sapply(strsplit(name_2," "),head,2)))
names(first_last_name) = c('first','last')
first_last_name## first last
## 1 Moe Szyslak
## 2 Montgomery Burns
## 3 Timothy Lovejoy
## 4 Ned Flanders
## 5 Homer Simpson
## 6 Julius Hibbert
name## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
title =str_detect(name,"^[A-z]{1,3}\\. ") ### 1 to 3 alphabet positioning at the start of a string followed by '. '.
cbind(name,title)## name title
## [1,] "Moe Szyslak" "FALSE"
## [2,] "Burns, C. Montgomery" "FALSE"
## [3,] "Rev. Timothy Lovejoy" "TRUE"
## [4,] "Ned Flanders" "FALSE"
## [5,] "Simpson, Homer" "FALSE"
## [6,] "Dr. Julius Hibbert" "TRUE"
sec_name = str_detect(name,"\\s[A-z]\\. ") ### ' ' +second_name +'. '
cbind(name,sec_name)## name sec_name
## [1,] "Moe Szyslak" "FALSE"
## [2,] "Burns, C. Montgomery" "TRUE"
## [3,] "Rev. Timothy Lovejoy" "FALSE"
## [4,] "Ned Flanders" "FALSE"
## [5,] "Simpson, Homer" "FALSE"
## [6,] "Dr. Julius Hibbert" "FALSE"
regexp_4_1 = '[0-9]+\\$'
test_4_1 = 'abs1111$0110100101 '
str_extract_all(test_4_1,regexp_4_1)## [[1]]
## [1] "1111$"
regexp_4_2 = "\\b[a-z]{1,4}\\b"
test_4_2 = 'cadfc abw'
str_extract_all(test_4_2,regexp_4_2)## [[1]]
## [1] "abw"
regexp_4_3 = ".*?\\.txt$"
test_4_3 = '.\test.txt'
str_extract_all(test_4_3,regexp_4_3)## [[1]]
## [1] ".\test.txt"
regexp_4_4 = "\\d{2}/\\d{2}/\\d{4}"
test_4_4 = '11/11/1111'
str_extract_all(test_4_4,regexp_4_4)## [[1]]
## [1] "11/11/1111"
print('<any character> any character </contents of group 1>')## [1] "<any character> any character </contents of group 1>"
regexp_4_5 = "<(.+?)>.+?</\\1>"
test_4_5 = '<span>20 flips come up heads</span>'
str_extract_all(test_4_5,regexp_4_5)## [[1]]
## [1] "<span>20 flips come up heads</span>"
str_9 = "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5 fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
regexp_9 = "[A-Z]+"
str_extract_all(str_9, regexp_9)## [[1]]
## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "AT" "I" "O" "N" "S"
## [15] "Y" "O" "U" "A" "R" "E" "A" "S" "U" "P" "E" "R" "N" "E"
## [29] "R" "D"