#1. Using the 173 majors listed in fivethirtyeight.com’s College Majors dataset [https://fivethirtyeight.com/features/the-economic-guide-to-picking-a-college-major/], provide code that identifies the majors that contain either “DATA” or “STATISTICS”

college_majors<- data.frame(read.csv(url("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv")))
data_majors<- college_majors$Major[grep("DATA", college_majors$Major)] 
statistics_majors<- college_majors$Major[grep("STATISTICS", college_majors$Major)]

data_majors
## [1] "COMPUTER PROGRAMMING AND DATA PROCESSING"
statistics_majors
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "STATISTICS AND DECISION SCIENCE"

#2 Write code that transforms the data below: [1] “bell pepper” “bilberry” “blackberry” “blood orange” [5] “blueberry” “cantaloupe” “chili pepper” “cloudberry”
[9] “elderberry” “lime” “lychee” “mulberry”
[13] “olive” “salal berry” Into a format like this: c(“bell pepper”, “bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”, “chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”, “mulberry”, “olive”, “salal berry”)

original_fruits <- '[1] "bell pepper"  "bilberry"     "blackberry"   "blood orange"
[5] "blueberry"    "cantaloupe"   "chili pepper" "cloudberry"  
[9] "elderberry"   "lime"         "lychee"       "mulberry"    
[13] "olive"        "salal berry"'

revised_fruits <- regmatches(original_fruits, gregexpr("\"[^\"]+\"", original_fruits))[[1]]

final_fruits <- gsub("\"", "", revised_fruits)

final_fruits
##  [1] "bell pepper"  "bilberry"     "blackberry"   "blood orange" "blueberry"   
##  [6] "cantaloupe"   "chili pepper" "cloudberry"   "elderberry"   "lime"        
## [11] "lychee"       "mulberry"     "olive"        "salal berry"

#3 Describe, in words, what these expressions will match: (.)\1\1 The same character will be shown three times “(.)(.)\2\1” Two of the same characters connected to each other will be backwards (..)\1 Two characters of any value will be repeated “(.).\1.\1” A single character will be shown three times and there will be a different character between each original one “(.)(.)(.).*\3\2\1” Three characters will be shown, then no characters, and then the three original characters backwards

#4 Construct regular expressions to match words that: Start and end with the same character. Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.) Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)

install.packages("stringr",repos = "http://cran.us.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5m/4f5rvwrn5rngf6j4gpl2mc9w0000gn/T//RtmpBeyaDd/downloaded_packages
library("stringr")
words<- c("measurement", "gene", "bird", "procedure", "singer", "two", "power", "cabinet", "attitude", "magazine")
same_start_and_end<- str_match(words,'^(.).*\\1$')
same_start_and_end
##       [,1] [,2]
##  [1,] NA   NA  
##  [2,] NA   NA  
##  [3,] NA   NA  
##  [4,] NA   NA  
##  [5,] NA   NA  
##  [6,] NA   NA  
##  [7,] NA   NA  
##  [8,] NA   NA  
##  [9,] NA   NA  
## [10,] NA   NA
repeated_letters<- str_match(words,'(..).*\\1')
repeated_letters
##       [,1]        [,2]
##  [1,] "measureme" "me"
##  [2,] NA          NA  
##  [3,] NA          NA  
##  [4,] NA          NA  
##  [5,] NA          NA  
##  [6,] NA          NA  
##  [7,] NA          NA  
##  [8,] NA          NA  
##  [9,] NA          NA  
## [10,] NA          NA
three_repeats<- str_match(words,'(.).*\\1.*\\1')
three_repeats
##       [,1]       [,2]
##  [1,] "easureme" "e" 
##  [2,] NA         NA  
##  [3,] NA         NA  
##  [4,] NA         NA  
##  [5,] NA         NA  
##  [6,] NA         NA  
##  [7,] NA         NA  
##  [8,] NA         NA  
##  [9,] "ttit"     "t" 
## [10,] NA         NA