Loading packages
I am loading stringr package.
Load data
#Copy the introductory example
raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555
-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"We are calling str_extract_all funtion from stringr package. It is defined as str_extract_all(string, pattern) such that we first input the string that is to be operated upon and second the expression we are looking for. str_extract_all will extract every match.
#Extract the names and store them in a vector called "name"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
name## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
### create a function to extract last names
get_last <- function(list){
last <- str_extract(list, '[[:alpha:]]{1,}\\,|\\b [[:alpha:]]{2,}')
last <- str_extract(last, "[[:alpha:]]{1,}")
last
}
### create a function to extract first names
get_first <- function(list) {
first <- str_extract(list, '[[:alpha:]]{1,} |\\. [[:alpha:]]{1,}|\\, [[:alpha:]]{2,}')
first <- str_extract(first, "[[:alpha:]]{1,}")
first
}# run functions to create a dataframe called "namedf"
namedf <- data.frame(first = get_first(name),
last = get_last(name))
namedf## [1] "Moe Szyslak" "Montgomery Burns" "Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Julius Hibbert"
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
# create a logical vector describing if second name
secondname <- str_detect(name, ' [:alpha:]{1}\\. ')
secondname## [1] FALSE TRUE FALSE FALSE FALSE FALSE
# test
raw.datatest <- "333-6589Awsaf Akbar(502) 888-0253Md. Forhad Akbar15165-54654-32546Shamzida Sharmin525"#Extract the names and store them in a vector called "nametest"
nametest <- unlist(str_extract_all(raw.datatest, "[[:alpha:]., ]{2,}"))
nametest## [1] "Awsaf Akbar" "Md. Forhad Akbar" "Shamzida Sharmin"
# run functions to create namedftest
namedftest <- data.frame(first = get_first(nametest),
last = get_last(nametest))
namedftest## [1] "Awsaf Akbar" "Forhad Akbar" "Shamzida Sharmin"
# create logicial vector describing if title
nametitletest <- str_detect(nametest, 'Md.')
nametitletest## [1] FALSE TRUE FALSE
# create a logical vector describing if second name
secondnametest <- str_detect(nametest, ' [:alpha:]{1}\\. ')
secondnametest## [1] FALSE FALSE FALSE
I will try to explain each regular expression in details and come up with at least two different examples.
Store the pattern in a variable. Then Create two different example strings and test them
#Store the pattern in a variable
pattern_a<- "[0-9]+\\$"
# Create two different example strings
a1 <- "This is a example string: 120$."
# Extract string from example to see if explanation provided is correct
example1<- unlist(str_extract_all(a1, pattern_a))
example1## [1] "120$"
## [1] TRUE
# Create example strings
a2<- "240$a12$.5.7$a$"
# Test regular expression to see if explanation provided is correct
example2<- unlist(str_extract_all(a2,pattern_a))
example2## [1] "240$" "12$" "7$"
## [1] TRUE
Create two different example strings and test them
#Store the pattern in a variable
pattern_b<- "\\b[a-z]{1,4}\\b"
# Create example string
b1 <- "I am doing Data 607 assignmnment 3."
# Test regular expression to see if explanation provided is correct
example3<-unlist(str_extract_all(b1, pattern_b))
example3## [1] "am"
## [1] TRUE
# Create example string
b2 <- "607-Crown-jewel-data-expression-six-zero-seven-etc$"
# Test regular expression to see if explanation provided is correct
example4<-unlist(str_extract_all(b2, pattern_b))
example4## [1] "data" "six" "zero" "etc"
## [1] TRUE
Create two different example strings and test them
#Store the pattern in a variable
pattern_c<- ".*?\\.txt$"
# Create example string
c1 <- "5454#34_2.txt option.png.image dark.txt"
# Test regular expression to see if explanation provided is correct
example5<-unlist(str_extract_all(c1, pattern_c))
example5## [1] "5454#34_2.txt option.png.image dark.txt"
## [1] TRUE
# Create example string
c2 <- c(".txt","move.text","data.txt","1$g!1.txt")
# Test regular expression to see if explanation provided is correct
example6<-unlist(str_extract_all(c2, pattern_c))
example6## [1] ".txt" "data.txt" "1$g!1.txt"
## [1] TRUE
Create two different example strings and test them
#Store the pattern in a variable
pattern_d<- "\\d{2}/\\d{2}/\\d{4}"
# Create example string
d1<-c("2/15/2019","04/12/2019","26/03/1985","1/1/1986","34/99/0005","2/3/358")
# Test regular expression to see if explanation provided is correct
example7<-unlist(str_extract_all(d1, pattern_d))
example7## [1] "04/12/2019" "26/03/1985" "34/99/0005"
## [1] FALSE TRUE TRUE FALSE TRUE FALSE
# Create example string
d2<-"09/12/2019 09/12/2016 2014/01/04"
# Test regular expression to see if explanation provided is correct
example8<-unlist(str_extract_all(d2, pattern_d))
example8## [1] "09/12/2019" "09/12/2016"
## [1] TRUE
<text> and ends with </text>. This would be a good way to search through html or xml.Create two different example strings and test them
#Store the pattern in a variable
pattern_e<- "<(.+?)>.+?</\\1>"
# Create example string
e1<-c("<tag>Text</tag>","<Font size=4,color=black>Black Text</Font size=4,color=blue>")
# Test regular expression to see if explanation provided is correct
example9<-unlist(str_extract_all(e1, pattern_e))
example9## [1] "<tag>Text</tag>"
## [1] TRUE FALSE
# Create example string
e2<-"<div>hello world</div> <ol><li>one</li><li>two</li></ol>"
# Test regular expression to see if explanation provided is correct
example10<-unlist(str_extract_all(e2, pattern_e))
example10## [1] "<div>hello world</div>" "<ol><li>one</li><li>two</li></ol>"
## [1] TRUE
clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5 fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr
# store the code into a variable called "Secret"
secret <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"# remove all nums, lowercase letters
pattern<- "[[:upper:]]|[[:punct:]]"
cracked <- unlist(str_extract_all(secret, pattern))
cracked## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" "." "Y"
## [18] "O" "U" "." "A" "R" "E" "." "A" "." "S" "U" "P" "E" "R" "N" "E" "R"
## [35] "D" "!"
## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "A" "T" "I" "O" "N" "S" " " "Y"
## [18] "O" "U" " " "A" "R" "E" " " "A" " " "S" "U" "P" "E" "R" "N" "E" "R"
## [35] "D" "!"
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD!"