There are 3 majors that contain either “DATA” or “STATISTICS”: “MANAGEMENT INFORMATION SYSTEMS AND STATISTICS”, “COMPUTER PROGRAMMING AND DATA PROCESSING”, and “STATISTICS AND DECISION SCIENCE”
df = read.csv(file = "https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv")
print(grep(('data|statistics'), df$Major,value=TRUE, ignore.case = TRUE))
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "COMPUTER PROGRAMMING AND DATA PROCESSING"
## [3] "STATISTICS AND DECISION SCIENCE"
[1] “bell pepper” “bilberry” “blackberry” “blood orange” [5]
“blueberry” “cantaloupe” “chili pepper” “cloudberry”
[9] “elderberry” “lime” “lychee” “mulberry”
[13] “olive” “salal berry” Into a format like this: c(“bell pepper”,
“bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”,
“chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”,
“mulberry”, “olive”, “salal berry”)
#My attempt before I saw the message on slack. Converted it to a factor vector to resemble what the first format looked like and then converted it to a character vector.
fruits <- c("bell pepper", "bilberry", "blackberry", "blood orange","blueberry", "cantaloupe",
"chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry",
"olive", "salal berry")
fruits_factor <- factor(fruits)
factor_to_char <- function(x) {
paste("c(", paste(paste("\"", as.character(x), "\""), collapse = ", "), ")", sep = "")
}
fruits_char <- factor_to_char(fruits_factor)
cat(fruits_char) #close enough
## c(" bell pepper ", " bilberry ", " blackberry ", " blood orange ", " blueberry ", " cantaloupe ", " chili pepper ", " cloudberry ", " elderberry ", " lime ", " lychee ", " mulberry ", " olive ", " salal berry ")
#My attempt after I saw the message on slack. Both have the same outcome.
strStart = '[1] "bell pepper" "bilberry" "blackberry" "blood orange"
[5] "blueberry" "cantaloupe" "chili pepper" "cloudberry"
[9] "elderberry" "lime" "lychee" "mulberry"
[13] "olive" "salal berry"'
convert_string_to_vector <- function(strStart) {
fruits_raw <- gsub("\\[\\d+\\]", "", strStart)
fruits_raw <- trimws(unlist(strsplit(fruits_raw, "\"")))
fruits_raw <- fruits_raw[fruits_raw != ""]
fruits <- paste("\"", fruits_raw, "\"", collapse = ", ")
cat(paste0("c(", fruits, ")"))
}
convert_string_to_vector(strStart)
## c(" bell pepper ", " bilberry ", " blackberry ", " blood orange ", " blueberry ", " cantaloupe ", " chili pepper ", " cloudberry ", " elderberry ", " lime ", " lychee ", " mulberry ", " olive ", " salal berry ")
The two exercises below are taken from R for Data Science, 14.3.5.1 in the on-line version:
exer_3 <- c('banana', 'daaa', 'bbb', 'abcd', 'abba', 'sbbsb', '444','32323', '1234321', 'abcddcba', 'church','eleven')
str_subset(exer_3, "(.)\1\1")
## character(0)
str_subset(exer_3, "(.)\\1\\1")
## [1] "daaa" "bbb" "444"
str_subset(exer_3, "(.)(.)\\2\\1")
## [1] "abba" "sbbsb" "abcddcba"
str_subset(exer_3, "(..)\\1")
## [1] "banana" "32323"
str_subset(exer_3, "(.).\\1.\\1")
## [1] "banana" "32323" "eleven"
str_subset(exer_3,"(.)(.)(.).*\\3\\2\\1")
## [1] "1234321" "abcddcba"
str_subset(exer_3,"^(.).*\\1$")
## [1] "bbb" "abba" "444" "32323" "1234321" "abcddcba"
str_subset(exer_3,".*([A-Za-z][A-Za-z]).*\\1.*")
## [1] "banana" "sbbsb" "church"
str_subset(exer_3,".*([A-Za-z]).*\\1.*\\1.*")
## [1] "banana" "daaa" "bbb" "sbbsb" "eleven"