library(readr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ dplyr 1.0.10
## ✔ tibble 3.1.8 ✔ stringr 1.5.0
## ✔ tidyr 1.3.0 ✔ forcats 0.5.2
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
majors_url = "https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv"
majors <- read_csv(majors_url)
## Rows: 174 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): FOD1P, Major, Major_Category
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str_subset(majors$Major, pattern = "(DATA|STATISTICS)")
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "COMPUTER PROGRAMMING AND DATA PROCESSING"
## [3] "STATISTICS AND DECISION SCIENCE"
library(stringr)
fruits <- '[1] "bell pepper" "bilberry" "blackberry" "blood orange"
[5] "blueberry" "cantaloupe" "chili pepper" "cloudberry"
[9] "elderberry" "lime" "lychee" "mulberry"
[13] "olive" "salal berry"'
words_vector <- function(string) {
str_extract_all(string, "[a-z]+\\s*[a-z]+")
}
words_vector(fruits)
## [[1]]
## [1] "bell pepper" "bilberry" "blackberry" "blood orange" "blueberry"
## [6] "cantaloupe" "chili pepper" "cloudberry" "elderberry" "lime"
## [11] "lychee" "mulberry" "olive" "salal berry"
Note: I am operating with the rule that words can only contain letters
words <-c('ono', 'jelly', 'pop', 'lol', 'theeth', 'jhoojh', 'qafqa', "eee",
"elelel", "olpojo", "jokollo", 'chchlop')
# 4.1: Start and end with the same character
same_first_last <- str_subset(words,"^([a-zA-Z])[a-zA-Z]*\\1$")
same_first_last
## [1] "ono" "pop" "lol" "eee" "olpojo"
# 4.2: Contain a repeated pair of letters (e.g. "church" contains "ch" repeated twice.)
repeated_pair <- str_subset(words, "([a-zA-Z][a-zA-Z])[a-zA-Z]*\\1")
repeated_pair
## [1] "theeth" "jhoojh" "qafqa" "elelel" "chchlop"
# 4.3: Contain one letter repeated in at least three places (e.g. "eleven" contains three "e"s.)
same_letter_3x <- str_subset(words, '([a-zA-Z])[a-zA-Z]*\\1[a-zA-Z]*\\1')
same_letter_3x
## [1] "eee" "elelel" "olpojo" "jokollo"