Copy the introductory example. The vector name stores the extracted names.
#R> name
#[1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
#[4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
# we create a variable to store the raw data given in the problem
library(stringr)
raw.data <-"555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
raw.data
## [1] "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson, Homer5553642Dr. Julius Hibbert"
# 1. we need to extract the alphabetic characters, we use [[:alpha:]., ]
# 2. we add a qualifier to impose the restriction that contents of the character class have to be matched at least twice to be considered a match.
names <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))
names
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
## [1] FALSE
## Loading required package: devtools
## Loading required package: usethis
library(kableExtra)
names_df <- as.data.frame(names)
names_df %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width="100%",height="300px")
names |
---|
Moe Szyslak |
Burns, C. Montgomery |
Rev. Timothy Lovejoy |
Ned Flanders |
Simpson, Homer |
Dr. Julius Hibbert |
# split the name into 2 columns on , character
splits <- str_split_fixed(names_df$names, ",", 2)
splits
## [,1] [,2]
## [1,] "Moe Szyslak" ""
## [2,] "Burns" " C. Montgomery"
## [3,] "Rev. Timothy Lovejoy" ""
## [4,] "Ned Flanders" ""
## [5,] "Simpson" " Homer"
## [6,] "Dr. Julius Hibbert" ""
# rejoin the 2 columns to get proper names plus reverse the order of the columns to get proper first and last name
names_df <- paste(splits[,2], splits[,1], sep = ' ')
names_df %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width="100%",height="300px")
x |
---|
Moe Szyslak |
C. Montgomery Burns |
Rev. Timothy Lovejoy |
Ned Flanders |
Homer Simpson |
Dr. Julius Hibbert |
# I used the built-in function within stringr package str_detect with a regex pattern to detect the title.
has_title <- str_detect(names_df, "^\\s[A-Za-z]{2,3}\\W+\\B\\s")
has_title
## [1] FALSE FALSE TRUE FALSE FALSE TRUE
# I used the same built in function to see if there is a match for S. name pattern using regex
# if the name has second name it should follow the pattern of C. The name
has_second_name <- str_detect(names_df, "[A-Z]{1}\\.\\s")
has_second_name
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
Describe the types of strings that conform to the following regular expressions and construct an example that is matched by the regular expression.
The following code hides a secret message. Crack it with R and regular expressions. Hint: Some of the characters are more revealing than others!
clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5 fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr
secret_msg = "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
# First, I extracted all the upper case letters from the text and the dots
msg <- str_extract_all(secret_msg, "[[:upper:][:punct:]]{1,}")
# converting the string into a vector of chars
msg_df <- unlist(msg)
msg_df
## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "AT" "I" "O" "N" "S"
## [15] "." "Y" "O" "U" "." "A" "R" "E" "." "A" ".S" "U" "P" "E"
## [29] "R" "N" "E" "R" "D" "!"
# concatenate all the chars with each other
msg_df <- str_c(msg_df, collapse = "")
# replace all the dots with white spaces
msg <- str_replace_all(msg_df, "\\.", " ")
msg
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD!"