library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(stringr)
Using the 173 majors listed in fivethirtyeight.com’s College Majors dataset [https://fivethirtyeight.com/features/the-economic-guide-to-picking-a-college-major/], provide code that identifies the majors that contain either “DATA” or “STATISTICS”
provide code that identifies the majors that contain either “DATA” or “STATISTICS”
collegeMajor <- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv", header = TRUE)
collegeMajor$Major %>% str_subset("DATA|STATISTICS")
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "COMPUTER PROGRAMMING AND DATA PROCESSING"
## [3] "STATISTICS AND DECISION SCIENCE"
Write code that transforms the data below: [1] “bell pepper” “bilberry” “blackberry” “blood orange” [5] “blueberry” “cantaloupe” “chili pepper” “cloudberry”
[9] “elderberry” “lime” “lychee” “mulberry”
[13] “olive” “salal berry” Into a format like this:
vec.text = c('[1] "bell pepper" "bilberry" "blackberry" "blood orange"
[5] "blueberry" "cantaloupe" "chili pepper" "cloudberry"
[9] "elderberry" "lime" "lychee" "mulberry"
[13] "olive" "salal berry"')
vec.text.char = gsub("(\\n\\[\\d+\\])|(^\\[\\d+\\])", "", vec.text)
vec.text.char = strsplit(vec.text.char, '\\"')
vec.text.char = unlist(vec.text.char)
vec.text.char = vec.text.char[grep("[a-z]", vec.text.char)]
vec.text.char
## [1] "bell pepper" "bilberry" "blackberry" "blood orange" "blueberry"
## [6] "cantaloupe" "chili pepper" "cloudberry" "elderberry" "lime"
## [11] "lychee" "mulberry" "olive" "salal berry"
x1<- "zzz"
str_detect(x1,"(.)\\1\\1")
## [1] TRUE
x1<- "peep"
str_detect(x1,"(.)(.)\\2\\1")
## [1] TRUE
Two character repeated one time
x1<- "soso"
str_detect(x1,"(..)\\1")
## [1] TRUE
x1<- "gpgeg"
str_detect(x1,"(.).\\1.\\1")
## [1] TRUE
x1<- "moppom"
str_detect(x1,"(.)(.)(.).*\\3\\2\\1")
## [1] TRUE
str_view("pump", "^(.).*\\1$", match = T)
str_view("salsa", "(..).*\\1", match = T)
str_view("settlements", "(.).*\\1.*\\1", match = T)