library(readr)
library(tidyverse)
library(stringr)
Question 1 code provide code that identifies the majors that contain either “DATA” or “STATISTICS”
college_majors_df <- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/all-ages.csv")
college_majors_df[str_detect(college_majors_df$Major, "DATA")|str_detect(college_majors_df$Major,"STATISTICS"),]
## Major_code Major
## 20 2101 COMPUTER PROGRAMMING AND DATA PROCESSING
## 93 3702 STATISTICS AND DECISION SCIENCE
## 170 6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS
## Major_category Total Employed Employed_full_time_year_round
## 20 Computers & Mathematics 29317 22828 18747
## 93 Computers & Mathematics 24806 18808 14468
## 170 Business 156673 134478 118249
## Unemployed Unemployment_rate Median P25th P75th
## 20 2265 0.09026422 60000 40000 85000
## 93 1138 0.05705405 70000 43000 102000
## 170 6186 0.04397714 72000 50000 100000
Question 2 code
q2_original = '[1] "bell pepper" "bilberry" "blackberry" "blood orange"
[5] "blueberry" "cantaloupe" "chili pepper" "cloudberry"
[9] "elderberry" "lime" "lychee" "mulberry"
[13] "olive" "salal berry"'
str_view(q2_original)
## [1] │ [1] "bell pepper" "bilberry" "blackberry" "blood orange"
## │ [5] "blueberry" "cantaloupe" "chili pepper" "cloudberry"
## │ [9] "elderberry" "lime" "lychee" "mulberry"
## │ [13] "olive" "salal berry"
print(q2_original)
## [1] "[1] \"bell pepper\" \"bilberry\" \"blackberry\" \"blood orange\"\n[5] \"blueberry\" \"cantaloupe\" \"chili pepper\" \"cloudberry\" \n[9] \"elderberry\" \"lime\" \"lychee\" \"mulberry\" \n[13] \"olive\" \"salal berry\""
q2_final <- unlist(str_extract_all(q2_original, pattern = "[a-z]+"))
print(q2_final)
## [1] "bell" "pepper" "bilberry" "blackberry" "blood"
## [6] "orange" "blueberry" "cantaloupe" "chili" "pepper"
## [11] "cloudberry" "elderberry" "lime" "lychee" "mulberry"
## [16] "olive" "salal" "berry"
Question 3
(.)\1\1
This expression takes a character (.) and then copies that captured number two separate times (\1\1)
“(.)(.)\2\1”
This expression has two captured groups and reverses the output
(..)\1
This expression will take two characters at once and simply repeat them again.
“(.).\1.\1”
This expression looks to take a single character, match it with the first character and then repeat it using 1\, match the character and then repeat it again
“(.)(.)(.).*\3\2\1”
The expression looks like it is taking three characters (.)(.)(.) and then repeating it in reverse order using a x multiplier
Question 4
Start and end with the same character.
Part 1
list <- c('print', 'hello', 'world', 'mom', 'yippy', 'dead', 'harsh', 'essence', 'median', 'mode', 'sandwich', 'standarddeviation', 'eigenvalue', 'pcaanalysis', 'church', 'eleven')
Should be 6 words that print after this expression is done running
str_view(list, "^(.)(.*\\1$)")
## [4] │ <mom>
## [5] │ <yippy>
## [6] │ <dead>
## [7] │ <harsh>
## [8] │ <essence>
## [13] │ <eigenvalue>
Part 2 Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.)
str_view(list, "([A-Za-z][A-Za-z]).*\\1")
## [15] │ <church>
Part 3 Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.) I added a few more words to see if it would take more than 3, for multiple letters and it worked
str_view(list, "([A-Za-z]).*\\1.*\\1")
## [8] │ <essence>
## [12] │ st<andarddevia>tion
## [13] │ <eigenvalue>
## [14] │ pc<aana>lysis
## [16] │ <eleve>n