library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#1
# load csv from github
majors_list <- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv")
# search for DATA OR STATISTICS
pattern <- "DATA|STATISTICS"
# find the rows with matches
matching_rows <- grepl(pattern, majors_list$Major, ignore.case = TRUE)
# extract
majors <- majors_list[matching_rows, ]
print(majors)
## FOD1P Major Major_Category
## 44 6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS Business
## 52 2101 COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
## 59 3702 STATISTICS AND DECISION SCIENCE Computers & Mathematics
#2
strIn <- '[1] "bell pepper" "bilberry" "blackberry" "blood orange"
[5] "blueberry" "cantaloupe" "chili pepper" "cloudberry"
[9] "elderberry" "lime" "lychee" "mulberry"
[13] "olive" "salal berry"'
# extract quoted strings
matches <- gregexpr("\"[^\"]+\"", strIn)
# extract matcehd strings
extracted_strings <- regmatches(strIn, matches)[[1]]
#concatenate strings
strOut <- paste(extracted_strings, collapse = ", ")
print(strOut)
## [1] "\"bell pepper\", \"bilberry\", \"blackberry\", \"blood orange\", \"blueberry\", \"cantaloupe\", \"chili pepper\", \"cloudberry\", \"elderberry\", \"lime\", \"lychee\", \"mulberry\", \"olive\", \"salal berry\""
#add commas in between quotes
commas <- gsub('"', '', strOut)
print(commas)
## [1] "bell pepper, bilberry, blackberry, blood orange, blueberry, cantaloupe, chili pepper, cloudberry, elderberry, lime, lychee, mulberry, olive, salal berry"
# remove quotes
quotes <- gsub('"', '', commas)
print(quotes)
## [1] "bell pepper, bilberry, blackberry, blood orange, blueberry, cantaloupe, chili pepper, cloudberry, elderberry, lime, lychee, mulberry, olive, salal berry"
# create list
list <- as.list(str_split(quotes,",")[[1]])
print(list)
## [[1]]
## [1] "bell pepper"
##
## [[2]]
## [1] " bilberry"
##
## [[3]]
## [1] " blackberry"
##
## [[4]]
## [1] " blood orange"
##
## [[5]]
## [1] " blueberry"
##
## [[6]]
## [1] " cantaloupe"
##
## [[7]]
## [1] " chili pepper"
##
## [[8]]
## [1] " cloudberry"
##
## [[9]]
## [1] " elderberry"
##
## [[10]]
## [1] " lime"
##
## [[11]]
## [1] " lychee"
##
## [[12]]
## [1] " mulberry"
##
## [[13]]
## [1] " olive"
##
## [[14]]
## [1] " salal berry"
# trime white spaces to format as expected answer
final <- trimws(list)
print(final)
## [1] "bell pepper" "bilberry" "blackberry" "blood orange" "blueberry"
## [6] "cantaloupe" "chili pepper" "cloudberry" "elderberry" "lime"
## [11] "lychee" "mulberry" "olive" "salal berry"
# load expected output
y = c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")
# answer matches expected output
y == final
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#3
#(.)\1\1 = a character that appears three times in a row such as aaa
#"(.)(.)\\2\\1" = a couple of characters and then the same characters in reversed such as 1221
#(..)\1 = any characters repeated such as 1212
#"(.).\\1.\\1" = a character, then another character, the original character, a different character, and then th original character such as 12131
#"(.)(.)(.).*\\3\\2\\1" = three characters and then a character or no character and then the same three characters in reversed.
# such as 1234321
#4
#load DATA that start and end with the same character
str_subset(words, "^(.)((.*\\1$)|\\1?$)")
## [1] "a" "america" "area" "dad" "dead"
## [6] "depend" "educate" "else" "encourage" "engine"
## [11] "europe" "evidence" "example" "excuse" "exercise"
## [16] "expense" "experience" "eye" "health" "high"
## [21] "knock" "level" "local" "nation" "non"
## [26] "rather" "refer" "remember" "serious" "stairs"
## [31] "test" "tonight" "transport" "treat" "trust"
## [36] "window" "yesterday"
# data with repeating letters
str_subset(words, "([A-Za-z][A-Za-z]).*\\1")
## [1] "appropriate" "church" "condition" "decide" "environment"
## [6] "london" "paragraph" "particular" "photograph" "prepare"
## [11] "pressure" "remember" "represent" "require" "sense"
## [16] "therefore" "understand" "whether"
#words with one letter with three repeating characters
str_subset(words, "([a-z]).*\\1.*\\1")
## [1] "appropriate" "available" "believe" "between" "business"
## [6] "degree" "difference" "discuss" "eleven" "environment"
## [11] "evidence" "exercise" "expense" "experience" "individual"
## [16] "paragraph" "receive" "remember" "represent" "telephone"
## [21] "therefore" "tomorrow"