Summary of Assignment This assignment involves character manipulation and expressions using R.
This Assignment requires the following:
1). R-Studio
The following R-packages are used: 1.stringr
Steps to reproduce: 1). run the R-Studio file: R_607_Week_3b_Hmk_Char_Manipulation_Daniel_Thonn.Rmd
Setting up and Preparing the Environment
#install.packages("stringr")
library(stringr)
Load the character names data into an R dataframe
name <- c("Moe Szyslak","Burns, C. Montgomery","Rev. Timothy Lovejoy","Ned Flanders", "Simpson, Homer","Dr. Julius Hibbert")
list(name)
## [[1]]
## [1] "Moe Szyslak" "Burns, C. Montgomery" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Simpson, Homer" "Dr. Julius Hibbert"
#3a).
# use str_split: Split the elements of a character vector x into substrings according to the matches to substring split within them.
# use sapply: traverse over a set of data like a list or vector, and calling the specified function for each item.
name2 = sapply(strsplit(name, split=", "),function(y)
{paste(rev(y),collapse=" ")})
list(name2)
## [[1]]
## [1] "Moe Szyslak" "C. Montgomery Burns" "Rev. Timothy Lovejoy"
## [4] "Ned Flanders" "Homer Simpson" "Dr. Julius Hibbert"
#3b). Check Titles
#str_detect: . str_detect() detects the presence or absence of a pattern and returns a logical vector
has_title <- data.frame (name2, str_detect(name2,"Rev.|Dr."))
list (has_title)
## [[1]]
## name2 str_detect.name2...Rev..Dr...
## 1 Moe Szyslak FALSE
## 2 C. Montgomery Burns FALSE
## 3 Rev. Timothy Lovejoy TRUE
## 4 Ned Flanders FALSE
## 5 Homer Simpson FALSE
## 6 Dr. Julius Hibbert TRUE
#3c). Check Second Names
second_name <- data.frame(name, str_detect(name," [A-Z]\\."))
list (second_name)
## [[1]]
## name str_detect.name.....A.Z......
## 1 Moe Szyslak FALSE
## 2 Burns, C. Montgomery TRUE
## 3 Rev. Timothy Lovejoy FALSE
## 4 Ned Flanders FALSE
## 5 Simpson, Homer FALSE
## 6 Dr. Julius Hibbert FALSE
#4a).[0-9]+\\$ : contains one or more numbers then a $ sign
Four_a <- "111$"
str_detect(Four_a,"[0-9]+\\$")
## [1] TRUE
#4b).\\b[a-z]{1,4}\\b: A word with one to four letters
Four_b <- c("t","test","test1","testtt")
str_detect(Four_b,"\\b[a-z]{1,4}\\b")
## [1] TRUE TRUE FALSE FALSE
#4c). .*?\\.txt$: A word with period and ending in .txt
Four_c <- c("test.txt","test","t.text","testtt.txt","txttxt")
str_detect(Four_c,".*?\\.txt$")
## [1] TRUE FALSE FALSE TRUE FALSE
#4d). \\d{2}/\\d{2}/\\d{4} : character patterns with 2 digits, forward slash, 2 digits, forward slash, 4 digits
Four_d <- c("11/11/1111","1/11/1111","11/1/1111","11/11/111")
str_detect(Four_d,"\\d{2}/\\d{2}/\\d{4}")
## [1] TRUE FALSE FALSE FALSE
clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5 fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr
string1 <- "clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0Tanwo
Uwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigO
d6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5
fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
string2 <-unlist(str_extract_all(string1,"[[:upper:].]{1,}"))
string2
## [1] "C" "O" "N" "G" "R" "A" "T" "U" "L" "AT" "I" "O" "N" "S"
## [15] "." "Y" "O" "U" "." "A" "R" "E" "." "A" ".S" "U" "P" "E"
## [29] "R" "N" "E" "R" "D"
string3 <- str_replace_all(paste(string2, collapse = ''), "[.]", " ")
string3
## [1] "CONGRATULATIONS YOU ARE A SUPERNERD"