packages used
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.5 v dplyr 1.0.3
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
majors = read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv", header= TRUE, sep=",")
data_or_stat = function(arr){
list = c()
for (major in arr){
if(grepl('data',tolower(major)) | grepl('statistics',tolower(major))){
list = append(list,major)
}
}
return(list)
}
major_list = majors$Major
data_or_stat(major_list)
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "COMPUTER PROGRAMMING AND DATA PROCESSING"
## [3] "STATISTICS AND DECISION SCIENCE"
#found an easier way to do it in R after writing out my solution XD
grep('data|statistics', major_list,value=TRUE,ignore.case=TRUE)
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "COMPUTER PROGRAMMING AND DATA PROCESSING"
## [3] "STATISTICS AND DECISION SCIENCE"
fruits= c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")
string = "c('"
for (fruit in fruits){
if(fruit == fruits[1]){
string = paste(string,fruit,"'",sep='')
}
else{
fruit = paste("'", fruit,"'",sep = "")
string = paste(string,fruit, sep=", ")
}
}
string = paste(string,')')
string
## [1] "c('bell pepper', 'bilberry', 'blackberry', 'blood orange', 'blueberry', 'cantaloupe', 'chili pepper', 'cloudberry', 'elderberry', 'lime', 'lychee', 'mulberry', 'olive', 'salal berry' )"
Note: R requires \\1 to reference first call group while regex syntax is \1
matches a string that contains a character repeated 3 times in a row (except for line breaks) (ex:aaa)
strings = c('abc','aaa','bbb','xyz')
pattern = '(.)\\1\\1'
str_detect(strings,pattern)
## [1] FALSE TRUE TRUE FALSE
matches a string that contains any two characters followed those two characters reversed (ex: ahha)
strings = c('abcd','aaaa','ahha','haha')
pattern = "(.)(.)\\2\\1"
str_detect(strings,pattern)
## [1] FALSE TRUE TRUE FALSE
matches a string that contains any two characters repeated twice (ex:ezez)
strings = c('ezez','aaaa','haha','aboab')
pattern = "(..)\\1"
str_detect(strings, pattern)
## [1] TRUE TRUE TRUE FALSE
matches a string that contains three characters repeated (ex: abacad)
strings = c('abacad','aabbcc','three','aaa')
pattern = "(.).\\1.\\1"
str_detect(strings,pattern)
## [1] TRUE FALSE FALSE FALSE
matches a string that contains any three characters and its reverse (ex: abcdcba)
strings = c('cccccc','abcdcba','abcdabc','ahaha')
pattern = "(.)(.)(.).*\\3\\2\\1"
str_detect(strings,pattern)
## [1] TRUE TRUE FALSE FALSE
^([a-z])[a-z]*\1$
names = c('richard','hannah','bob','annie')
pattern = '^([a-z])[a-z]*\\1$'
str_detect(names, pattern)
## [1] FALSE TRUE TRUE FALSE
[a-z]*([a-z]{2})[a-z]*\1[a-z]*
words = c('church','dog','halal','cat')
pattern = "[a-z]*([a-z]{2})[a-z]*\\1[a-z]*"
str_detect(words,pattern)
## [1] TRUE FALSE TRUE FALSE
[a-z]*([a-z])[a-z]*\1[a-z]*\1[a-z]*
numbers = c('eleven','seven','three','seventeen')
pattern = '[a-z]*([a-z])[a-z]*\\1[a-z]*\\1[a-z]*'
str_detect(numbers, pattern)
## [1] TRUE FALSE FALSE TRUE