library(stringr)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.6     v dplyr   1.0.4
## v tidyr   1.1.2     v forcats 0.5.1
## v readr   1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readr)

Question #1

Using the 173 majors listed in fivethirtyeight.com’s College Majors dataset [https://fivethirtyeight.com/features/the-economic-guide-to-picking-a-college-major/], provide code that identifies the majors that contain either “DATA” or “STATISTICS”

majors_csv <- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv")
majors_dataframe <- data.frame(majors_csv)
data_statistics_majors <- subset(majors_dataframe, grepl("DATA", Major) | grepl("STATISTICS", Major))

data_statistics_majors
##    FOD1P                                         Major          Major_Category
## 44  6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS                Business
## 52  2101      COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
## 59  3702               STATISTICS AND DECISION SCIENCE Computers & Mathematics

Question #2

fruits <- '[1] "bell pepper"  "bilberry"     "blackberry"   "blood orange"

[5] "blueberry"    "cantaloupe"   "chili pepper" "cloudberry"  

[9] "elderberry"   "lime"         "lychee"       "mulberry"    

[13] "olive"        "salal berry"'
fruits_pattern <- str_extract_all(fruits,pattern = '[A-Za-z]+.?[A-Za-z]+')
fruits_vector <- str_c(fruits_pattern, collapse = ", ")
## Warning in stri_c(..., sep = sep, collapse = collapse, ignore_null = TRUE):
## argument is not an atomic vector; coercing
writeLines(fruits_vector)
## c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")

Question #3

txt <- c("vrooooom","daaaamn","zoom")
str_view(txt,"(.)\\1\\1")
txt2 <- c("woowing","daamn","zoom","lool")
str_view(txt2,"(.)(.)\\2\\1")
txt3 <- c("halaland","lalaland","paparazzi","todo","mama")
str_view(txt3,"(..)\\1")
txt4 <- c("alaba","evenen","event","avatar")
str_view(txt4,"(.).\\1.\\1")
txt5 <- c("badisdab","dabisnotbad","reyisrey","moonisnoom","docanbeod")
str_view(txt5,"(.)(.)(.).*\\3\\2\\1")

Question #4

str_view(c("bob","cloth","beeb"),"(.).*\\1")
str_view(c("church","blooggingg"),"(..).*\\1")
str_view(c("eleven","even"),"(.).*\\1.*\\1")