library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.3
library(openintro)
## Warning: package 'openintro' was built under R version 4.0.3
## Warning: package 'airports' was built under R version 4.0.3
## Warning: package 'cherryblossom' was built under R version 4.0.3
## Warning: package 'usdata' was built under R version 4.0.3

Exercise 1

  1. Using the 173 majors listed in fivethirtyeight.com’s College Majors dataset [https://fivethirtyeight.com/features/the-economic-guide-to-picking-a-college-major/], provide code that identifies the majors that contain either “DATA” or “STATISTICS”
library(dplyr)
library(stringr)

major <- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv")
head(major)
##   FOD1P                                 Major                  Major_Category
## 1  1100                   GENERAL AGRICULTURE Agriculture & Natural Resources
## 2  1101 AGRICULTURE PRODUCTION AND MANAGEMENT Agriculture & Natural Resources
## 3  1102                AGRICULTURAL ECONOMICS Agriculture & Natural Resources
## 4  1103                       ANIMAL SCIENCES Agriculture & Natural Resources
## 5  1104                          FOOD SCIENCE Agriculture & Natural Resources
## 6  1105            PLANT SCIENCE AND AGRONOMY Agriculture & Natural Resources
Data_Stats_Majors <- dplyr::filter(major, grepl('DATA|STATISTICS', Major))
head(Data_Stats_Majors)
##   FOD1P                                         Major          Major_Category
## 1  6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS                Business
## 2  2101      COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
## 3  3702               STATISTICS AND DECISION SCIENCE Computers & Mathematics

Exercise 2

#2 Write code that transforms the data below:

[1] “bell pepper” “bilberry” “blackberry” “blood orange”

[5] “blueberry” “cantaloupe” “chili pepper” “cloudberry”

[9] “elderberry” “lime” “lychee” “mulberry”

[13] “olive” “salal berry”

Into a format like this:

c(“bell pepper”, “bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”, “chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”, “mulberry”, “olive”, “salal berry”)

library(tidyverse)

fruits <- '[1] "bell pepper"  "bilberry"     "blackberry"   "blood orange"

[5] "blueberry"    "cantaloupe"   "chili pepper" "cloudberry"  

[9] "elderberry"   "lime"         "lychee"       "mulberry"    

[13] "olive"        "salal berry"'
fruits
## [1] "[1] \"bell pepper\"  \"bilberry\"     \"blackberry\"   \"blood orange\"\n\n[5] \"blueberry\"    \"cantaloupe\"   \"chili pepper\" \"cloudberry\"  \n\n[9] \"elderberry\"   \"lime\"         \"lychee\"       \"mulberry\"    \n\n[13] \"olive\"        \"salal berry\""
fruits2 <- str_extract_all(fruits, pattern = '[Za-z]+.?[Za-z]+')
fruits2
## [[1]]
##  [1] "bell pepper"  "bilberry"     "blackberry"   "blood orange" "blueberry"   
##  [6] "cantaloupe"   "chili pepper" "cloudberry"   "elderberry"   "lime"        
## [11] "lychee"       "mulberry"     "olive"        "salal berry"
fruits3 <- str_c(fruits2, collapse = ", ")
## Warning in stri_c(..., sep = sep, collapse = collapse, ignore_null = TRUE):
## argument is not an atomic vector; coercing
fruits3
## [1] "c(\"bell pepper\", \"bilberry\", \"blackberry\", \"blood orange\", \"blueberry\", \"cantaloupe\", \"chili pepper\", \"cloudberry\", \"elderberry\", \"lime\", \"lychee\", \"mulberry\", \"olive\", \"salal berry\")"
writeLines(fruits3)
## c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")

Exercise 3

Describe, in words, what these expressions will match:

(.)\1\1 > This will match any consecutive character 3 times in a row.

“(.)(.)\2\1” > Matches a palendrome–the first two characters are the same as the last two characters in reverse order: ’-__-’

(..)\1 > Matches a group of two characters that repeat themselves following immediately: ‘pepe’

“(.).\1.\1” > Macthes the initial character, followed by the second character AND the first character AS the third character, that both repeat twice: ‘ititi’

"(.)(.)(.).*\3\2\1" > Matches the first three characters followed by the same characters in reverse order

Exercise 4

Construct regular expressions to match words that:

-Start and end with the same character. -Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.) -Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)

str_view(c("noon", "dad"), "(.)(.).*\\1")
str_view(c("church", "chaching", "manyana"), "(.)(.)*\\1")
LS0tDQp0aXRsZTogIkRhdGEgNjA3LCBXZWVrIDMgYXNzaWdubWVudDIzIg0KYXV0aG9yOiAiSm9lIENvbm5vbGx5Ig0KZGF0ZTogImByIFN5cy5EYXRlKClgIg0Kb3V0cHV0OiBvcGVuaW50cm86OmxhYl9yZXBvcnQNCi0tLQ0KDQpgYGB7ciBsb2FkLXBhY2thZ2VzLCBtZXNzYWdlPUZBTFNFfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KG9wZW5pbnRybykNCmBgYA0KDQojIyMgRXhlcmNpc2UgMQ0KMS4gVXNpbmcgdGhlIDE3MyBtYWpvcnMgbGlzdGVkIGluIGZpdmV0aGlydHllaWdodC5jb23igJlzIENvbGxlZ2UgTWFqb3JzIGRhdGFzZXQgW2h0dHBzOi8vZml2ZXRoaXJ0eWVpZ2h0LmNvbS9mZWF0dXJlcy90aGUtZWNvbm9taWMtZ3VpZGUtdG8tcGlja2luZy1hLWNvbGxlZ2UtbWFqb3IvXSwgcHJvdmlkZSBjb2RlIHRoYXQgaWRlbnRpZmllcyB0aGUgbWFqb3JzIHRoYXQgY29udGFpbiBlaXRoZXIgIkRBVEEiIG9yICJTVEFUSVNUSUNTIg0KDQpgYGB7ciBjb2RlLWNodW5rLWxhYmVsfQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkoc3RyaW5ncikNCg0KbWFqb3IgPC0gcmVhZC5jc3YoImh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS9maXZldGhpcnR5ZWlnaHQvZGF0YS9tYXN0ZXIvY29sbGVnZS1tYWpvcnMvbWFqb3JzLWxpc3QuY3N2IikNCmhlYWQobWFqb3IpDQoNCkRhdGFfU3RhdHNfTWFqb3JzIDwtIGRwbHlyOjpmaWx0ZXIobWFqb3IsIGdyZXBsKCdEQVRBfFNUQVRJU1RJQ1MnLCBNYWpvcikpDQpoZWFkKERhdGFfU3RhdHNfTWFqb3JzKQ0KDQpgYGANCg0KIyMjIEV4ZXJjaXNlIDINCg0KIzIgV3JpdGUgY29kZSB0aGF0IHRyYW5zZm9ybXMgdGhlIGRhdGEgYmVsb3c6DQoNClsxXSAiYmVsbCBwZXBwZXIiICAiYmlsYmVycnkiICAgICAiYmxhY2tiZXJyeSIgICAiYmxvb2Qgb3JhbmdlIg0KDQpbNV0gImJsdWViZXJyeSIgICAgImNhbnRhbG91cGUiICAgImNoaWxpIHBlcHBlciIgImNsb3VkYmVycnkiICANCg0KWzldICJlbGRlcmJlcnJ5IiAgICJsaW1lIiAgICAgICAgICJseWNoZWUiICAgICAgICJtdWxiZXJyeSIgICAgDQoNClsxM10gIm9saXZlIiAgICAgICAgInNhbGFsIGJlcnJ5Ig0KDQpJbnRvIGEgZm9ybWF0IGxpa2UgdGhpczoNCg0KYygiYmVsbCBwZXBwZXIiLCAiYmlsYmVycnkiLCAiYmxhY2tiZXJyeSIsICJibG9vZCBvcmFuZ2UiLCAiYmx1ZWJlcnJ5IiwgImNhbnRhbG91cGUiLCAiY2hpbGkgcGVwcGVyIiwgImNsb3VkYmVycnkiLCAiZWxkZXJiZXJyeSIsICJsaW1lIiwgImx5Y2hlZSIsICJtdWxiZXJyeSIsICJvbGl2ZSIsICJzYWxhbCBiZXJyeSIpDQoNCmBgYCB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KDQpmcnVpdHMgPC0gJ1sxXSAiYmVsbCBwZXBwZXIiICAiYmlsYmVycnkiICAgICAiYmxhY2tiZXJyeSIgICAiYmxvb2Qgb3JhbmdlIg0KDQpbNV0gImJsdWViZXJyeSIgICAgImNhbnRhbG91cGUiICAgImNoaWxpIHBlcHBlciIgImNsb3VkYmVycnkiICANCg0KWzldICJlbGRlcmJlcnJ5IiAgICJsaW1lIiAgICAgICAgICJseWNoZWUiICAgICAgICJtdWxiZXJyeSIgICAgDQoNClsxM10gIm9saXZlIiAgICAgICAgInNhbGFsIGJlcnJ5IicNCmZydWl0cw0KDQpmcnVpdHMyIDwtIHN0cl9leHRyYWN0X2FsbChmcnVpdHMsIHBhdHRlcm4gPSAnW1phLXpdKy4/W1phLXpdKycpDQpmcnVpdHMyDQoNCmZydWl0czMgPC0gc3RyX2MoZnJ1aXRzMiwgY29sbGFwc2UgPSAiLCAiKQ0KZnJ1aXRzMw0KDQp3cml0ZUxpbmVzKGZydWl0czMpDQpgYGANCg0KIyMjIEV4ZXJjaXNlIDMNCg0KRGVzY3JpYmUsIGluIHdvcmRzLCB3aGF0IHRoZXNlIGV4cHJlc3Npb25zIHdpbGwgbWF0Y2g6DQoNCiguKVwxXDENCj4gVGhpcyB3aWxsIG1hdGNoIGFueSBjb25zZWN1dGl2ZSBjaGFyYWN0ZXIgMyB0aW1lcyBpbiBhIHJvdy4NCg0KIiguKSguKVxcMlxcMSINCj4gTWF0Y2hlcyBhIHBhbGVuZHJvbWUtLXRoZSBmaXJzdCB0d28gY2hhcmFjdGVycyBhcmUgdGhlIHNhbWUgYXMgdGhlIGxhc3QgdHdvIGNoYXJhY3RlcnMgaW4gcmV2ZXJzZSBvcmRlcjogJy1fXy0nDQoNCiguLilcMQ0KPiBNYXRjaGVzIGEgZ3JvdXAgb2YgdHdvIGNoYXJhY3RlcnMgdGhhdCByZXBlYXQgdGhlbXNlbHZlcyBmb2xsb3dpbmcgaW1tZWRpYXRlbHk6ICdwZXBlJw0KDQoiKC4pLlxcMS5cXDEiDQo+IE1hY3RoZXMgdGhlIGluaXRpYWwgY2hhcmFjdGVyLCBmb2xsb3dlZCBieSB0aGUgc2Vjb25kIGNoYXJhY3RlciBBTkQgdGhlIGZpcnN0IGNoYXJhY3RlciBBUyB0aGUgdGhpcmQgY2hhcmFjdGVyLCB0aGF0IGJvdGggcmVwZWF0IHR3aWNlOiAnaXRpdGknDQoNCiIoLikoLikoLikuKlxcM1xcMlxcMSINCj4gTWF0Y2hlcyB0aGUgZmlyc3QgdGhyZWUgY2hhcmFjdGVycyBmb2xsb3dlZCBieSB0aGUgc2FtZSBjaGFyYWN0ZXJzIGluIHJldmVyc2Ugb3JkZXINCg0KDQojIyMgRXhlcmNpc2UgNA0KDQpDb25zdHJ1Y3QgcmVndWxhciBleHByZXNzaW9ucyB0byBtYXRjaCB3b3JkcyB0aGF0Og0KDQotU3RhcnQgYW5kIGVuZCB3aXRoIHRoZSBzYW1lIGNoYXJhY3Rlci4NCi1Db250YWluIGEgcmVwZWF0ZWQgcGFpciBvZiBsZXR0ZXJzIChlLmcuICJjaHVyY2giIGNvbnRhaW5zICJjaCIgcmVwZWF0ZWQgdHdpY2UuKQ0KLUNvbnRhaW4gb25lIGxldHRlciByZXBlYXRlZCBpbiBhdCBsZWFzdCB0aHJlZSBwbGFjZXMgKGUuZy4gImVsZXZlbiIgY29udGFpbnMgdGhyZWUgImUicy4pDQoNCg0KYGBge3J9DQpzdHJfdmlldyhjKCJub29uIiwgImRhZCIpLCAiKC4pKC4pLipcXDEiKQ0KYGBgDQoNCmBgYHtyfQ0Kc3RyX3ZpZXcoYygiY2h1cmNoIiwgImNoYWNoaW5nIiwgIm1hbnlhbmEiKSwgIiguKSguKSpcXDEiKQ0KYGBgDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQo=