library(tidyverse)
library(openintro)
library(lubridate)

##Loading data from website

raw_file = 
  'https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv'

Using the 173 majors listed in fivethirtyeight.com’s College Majors dataset ##[https://fivethirtyeight.com/features/the-economic-guide-to-picking-a-college-major/], provide code that identifies the majors that contain either “DATA” or “STATISTICS”

## Warning: Using one column matrices in `filter()` was deprecated in dplyr 1.1.0.
## ℹ Please use one dimensional logical vectors instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## # A tibble: 3 × 2
##   Major                                         matched_name[,1]                
##   <chr>                                         <chr>                           
## 1 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS MANAGEMENT INFORMATION SYSTEMS …
## 2 COMPUTER PROGRAMMING AND DATA PROCESSING      COMPUTER PROGRAMMING AND DATA P…
## 3 STATISTICS AND DECISION SCIENCE               STATISTICS AND DECISION SCIENCE

##Write code that transforms the data below: #create the vector of the DATA

fruits <- c("bell pepper","bilberry", "blackberry","blood orange","blueberry","
            cantaloupe", "chili pepper", "cloudberry",  "elderberry", "lime",
            "lychee" , "mulberry" , "olive", "salal berry")
fruits
##  [1] "bell pepper"              "bilberry"                
##  [3] "blackberry"               "blood orange"            
##  [5] "blueberry"                "\n            cantaloupe"
##  [7] "chili pepper"             "cloudberry"              
##  [9] "elderberry"               "lime"                    
## [11] "lychee"                   "mulberry"                
## [13] "olive"                    "salal berry"
fruits <- c("bell pepper","bilberry", "blackberry","blood orange","blueberry",
            "cantaloupe", "chili pepper", "cloudberry",  "elderberry", "lime", 
            "lychee" , "mulberry" , "olive", "salal berry")
cat(paste0("c(",paste0(sep = '"',fruits, collapse = ', ', sep='"'),paste(")")))
## c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")

#3 Describe, in words, what these expressions will match:

(.)\1\1 “(.)(.)\2\1” (..)\1 “(.).\1.\1” “(.)(.)(.).*\3\2\1”

##Answer:

(.)\1\1 - Any two characters that repeats in the reverse order. “(.)(.)\2\1” - Any two characters that repeats in the same order. “(.).\1.\1” - Any single character that repeats two more times, with each repetition after another single variable character. “(.)(.)(.).*\3\2\1” - Any three characters that repeat in the reverse order after any number of variable characters.

#4 Construct regular expressions to match words that: Start and end with the same character. Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.) Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)

##A_regex_expr1_Start and end with the same character

df.names <-c("anna", "church", "bob", "harry","paul", "eleven", "bubble")
regex_expr1 ="^(.)((.*\\1$)|\\1?$)"
str_subset(df.names,regex_expr1)
## [1] "anna" "bob"

##B_regex_expr2_Contain a repeated pair of letters

regex_expr2 = "([A-Za-z][A-Za-z]).*\\1"
str_subset(df.names,regex_expr2)
## [1] "church"

##C_regex_expr3_Contain one letter repeated in at least three places

regex_expr3 = "([A-Za-z]).*\\1.*\\1"
str_subset(df.names,regex_expr3 )
## [1] "eleven" "bubble"
LS0tDQp0aXRsZTogIkRhdGEgNjA3IEFzc2lnbm1lbnQgMDMiDQphdXRob3I6ICJNZC4gVGFuemlsIEVoc2FuIg0KZGF0ZTogIjAyLzE2LzIwMjUiDQpvdXRwdXQ6IG9wZW5pbnRybzo6bGFiX3JlcG9ydA0KZWRpdG9yX29wdGlvbnM6IA0KICBjaHVua19vdXRwdXRfdHlwZTogaW5saW5lDQotLS0NCg0KYGBge3IgbG9hZC1wYWNrYWdlcywgbWVzc2FnZT1GQUxTRX0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShvcGVuaW50cm8pDQpsaWJyYXJ5KGx1YnJpZGF0ZSkNCmBgYA0KDQojI0xvYWRpbmcgZGF0YSBmcm9tIHdlYnNpdGUNCg0KDQpgYGB7ciB9DQpyYXdfZmlsZSA9IA0KICAnaHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL2ZpdmV0aGlydHllaWdodC9kYXRhL21hc3Rlci9jb2xsZWdlLW1ham9ycy9tYWpvcnMtbGlzdC5jc3YnDQpgYGANCiMjIFVzaW5nIHRoZSAxNzMgbWFqb3JzIGxpc3RlZCBpbiBmaXZldGhpcnR5ZWlnaHQuY29t4oCZcyBDb2xsZWdlIE1ham9ycyBkYXRhc2V0ICMjW2h0dHBzOi8vZml2ZXRoaXJ0eWVpZ2h0LmNvbS9mZWF0dXJlcy90aGUtZWNvbm9taWMtZ3VpZGUtdG8tcGlja2luZy1hLWNvbGxlZ2UtbWFqb3IvXSwgcHJvdmlkZSBjb2RlIHRoYXQgaWRlbnRpZmllcyB0aGUgbWFqb3JzIHRoYXQgY29udGFpbiBlaXRoZXIg4oCcREFUQeKAnSBvciDigJxTVEFUSVNUSUNT4oCdDQoNCmBgYHtyIHByZXNzdXJlLCBlY2hvPUZBTFNFfQ0KbWFqb3JzIDwtYXNfdGliYmxlKHJlYWQuY3N2KHJhd19maWxlKSkNCm1ham9yc19zdWIgPC0gbWFqb3JzICU+JSANCiAgICBtdXRhdGUobWF0Y2hlZF9uYW1lID0gc3RyX21hdGNoKE1ham9yLCcuKkRBVEEuKnwuKlNUQVRJU1RJQ1MuKicpKSAlPiUNCiAgICBzZWxlY3QoTWFqb3IsbWF0Y2hlZF9uYW1lKSAlPiUNCiAgICBmaWx0ZXIoIWlzLm5hKG1hdGNoZWRfbmFtZSkpDQptYWpvcnNfc3ViDQpgYGANCiMjV3JpdGUgY29kZSB0aGF0IHRyYW5zZm9ybXMgdGhlIGRhdGEgYmVsb3c6DQojY3JlYXRlIHRoZSB2ZWN0b3Igb2YgdGhlIERBVEENCmBgYHtyfQ0KZnJ1aXRzIDwtIGMoImJlbGwgcGVwcGVyIiwiYmlsYmVycnkiLCAiYmxhY2tiZXJyeSIsImJsb29kIG9yYW5nZSIsImJsdWViZXJyeSIsIg0KICAgICAgICAgICAgY2FudGFsb3VwZSIsICJjaGlsaSBwZXBwZXIiLCAiY2xvdWRiZXJyeSIsICAiZWxkZXJiZXJyeSIsICJsaW1lIiwNCiAgICAgICAgICAgICJseWNoZWUiICwgIm11bGJlcnJ5IiAsICJvbGl2ZSIsICJzYWxhbCBiZXJyeSIpDQpmcnVpdHMNCmBgYA0KDQoNCmBgYHtyfQ0KZnJ1aXRzIDwtIGMoImJlbGwgcGVwcGVyIiwiYmlsYmVycnkiLCAiYmxhY2tiZXJyeSIsImJsb29kIG9yYW5nZSIsImJsdWViZXJyeSIsDQogICAgICAgICAgICAiY2FudGFsb3VwZSIsICJjaGlsaSBwZXBwZXIiLCAiY2xvdWRiZXJyeSIsICAiZWxkZXJiZXJyeSIsICJsaW1lIiwgDQogICAgICAgICAgICAibHljaGVlIiAsICJtdWxiZXJyeSIgLCAib2xpdmUiLCAic2FsYWwgYmVycnkiKQ0KDQpgYGANCg0KDQpgYGB7cn0NCmNhdChwYXN0ZTAoImMoIixwYXN0ZTAoc2VwID0gJyInLGZydWl0cywgY29sbGFwc2UgPSAnLCAnLCBzZXA9JyInKSxwYXN0ZSgiKSIpKSkNCmBgYA0KIzMgRGVzY3JpYmUsIGluIHdvcmRzLCB3aGF0IHRoZXNlIGV4cHJlc3Npb25zIHdpbGwgbWF0Y2g6DQoNCiguKVwxXDENCuKAnCguKSguKVxcMlxcMeKAnQ0KKC4uKVwxDQrigJwoLikuXFwxLlxcMeKAnQ0K4oCcKC4pKC4pKC4pLipcXDNcXDJcXDHigJ0NCg0KDQojI0Fuc3dlcjoNCg0KKC4pXDFcMSAtIEFueSB0d28gY2hhcmFjdGVycyB0aGF0IHJlcGVhdHMgaW4gdGhlIHJldmVyc2Ugb3JkZXIuDQrigJwoLikoLilcXDJcXDHigJ0gLSBBbnkgdHdvIGNoYXJhY3RlcnMgdGhhdCByZXBlYXRzIGluIHRoZSBzYW1lIG9yZGVyLg0K4oCcKC4pLlwxLlwx4oCdIC0gQW55IHNpbmdsZSBjaGFyYWN0ZXIgdGhhdCByZXBlYXRzIHR3byBtb3JlIHRpbWVzLCB3aXRoIGVhY2ggcmVwZXRpdGlvbiBhZnRlciBhbm90aGVyIHNpbmdsZSB2YXJpYWJsZSBjaGFyYWN0ZXIuDQrigJwoLikoLikoLikuKlwzXDJcMeKAnSAtIEFueSB0aHJlZSBjaGFyYWN0ZXJzIHRoYXQgcmVwZWF0IGluIHRoZSByZXZlcnNlIG9yZGVyIGFmdGVyIGFueSBudW1iZXIgb2YgdmFyaWFibGUgY2hhcmFjdGVycy4NCg0KIzQgQ29uc3RydWN0IHJlZ3VsYXIgZXhwcmVzc2lvbnMgdG8gbWF0Y2ggd29yZHMgdGhhdDogU3RhcnQgYW5kIGVuZCB3aXRoIHRoZSBzYW1lIGNoYXJhY3Rlci4gQ29udGFpbiBhIHJlcGVhdGVkIHBhaXIgb2YgbGV0dGVycyAoZS5nLiDigJxjaHVyY2jigJ0gY29udGFpbnMg4oCcY2jigJ0gcmVwZWF0ZWQgdHdpY2UuKSBDb250YWluIG9uZSBsZXR0ZXIgcmVwZWF0ZWQgaW4gYXQgbGVhc3QgdGhyZWUgcGxhY2VzIChlLmcuIOKAnGVsZXZlbuKAnSBjb250YWlucyB0aHJlZSDigJxl4oCdcy4pDQoNCg0KIyNBX3JlZ2V4X2V4cHIxX1N0YXJ0IGFuZCBlbmQgd2l0aCB0aGUgc2FtZSBjaGFyYWN0ZXINCg0KYGBge3J9DQpkZi5uYW1lcyA8LWMoImFubmEiLCAiY2h1cmNoIiwgImJvYiIsICJoYXJyeSIsInBhdWwiLCAiZWxldmVuIiwgImJ1YmJsZSIpDQpyZWdleF9leHByMSA9Il4oLikoKC4qXFwxJCl8XFwxPyQpIg0Kc3RyX3N1YnNldChkZi5uYW1lcyxyZWdleF9leHByMSkNCmBgYA0KIyNCX3JlZ2V4X2V4cHIyX0NvbnRhaW4gYSByZXBlYXRlZCBwYWlyIG9mIGxldHRlcnMNCmBgYHtyfQ0KcmVnZXhfZXhwcjIgPSAiKFtBLVphLXpdW0EtWmEtel0pLipcXDEiDQpzdHJfc3Vic2V0KGRmLm5hbWVzLHJlZ2V4X2V4cHIyKQ0KYGBgDQoNCiMjQ19yZWdleF9leHByM19Db250YWluIG9uZSBsZXR0ZXIgcmVwZWF0ZWQgaW4gYXQgbGVhc3QgdGhyZWUgcGxhY2VzDQoNCmBgYHtyfQ0KcmVnZXhfZXhwcjMgPSAiKFtBLVphLXpdKS4qXFwxLipcXDEiDQpzdHJfc3Vic2V0KGRmLm5hbWVzLHJlZ2V4X2V4cHIzICkNCmBgYA0K