Homework #3 607

Libraries Needed

library(readr)
library(stringr)

Question 1

The Majors that Contain ‘Data’ and ‘Statistics’ are Computer Programming and Data Processing, Management Information Systems and Statistics, and Statistics and Decision Science.

majors_list <- read_csv(url("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv"))

## Rows: 174 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): FOD1P, Major, Major_Category
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

head(majors_list)

## # A tibble: 6 × 3
##   FOD1P Major                                 Major_Category                 
##   <chr> <chr>                                 <chr>                          
## 1 1100  GENERAL AGRICULTURE                   Agriculture & Natural Resources
## 2 1101  AGRICULTURE PRODUCTION AND MANAGEMENT Agriculture & Natural Resources
## 3 1102  AGRICULTURAL ECONOMICS                Agriculture & Natural Resources
## 4 1103  ANIMAL SCIENCES                       Agriculture & Natural Resources
## 5 1104  FOOD SCIENCE                          Agriculture & Natural Resources
## 6 1105  PLANT SCIENCE AND AGRONOMY            Agriculture & Natural Resources

grep(pattern = 'DATA', majors_list$Major, value = TRUE, ignore.case = TRUE)

## [1] "COMPUTER PROGRAMMING AND DATA PROCESSING"

grep(pattern = 'Statistics', majors_list$Major, value = TRUE, ignore.case = TRUE)

## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "STATISTICS AND DECISION SCIENCE"

Question 2

Starting with one messy String we are going to get it into separated values into Vectors.

Original<-c ('[1] "bell pepper"  "bilberry"     "blackberry"   "blood orange"

[5] "blueberry"    "cantaloupe"   "chili pepper" "cloudberry"  

[9] "elderberry"   "lime"         "lychee"       "mulberry"    

[13] "olive"        "salal berry"')

Desired <-c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")



Original <- str_extract_all(Original, ('[[A-Za-z]]+\\s[[A-Za-z]]+|[[A-Za-z]]+'))
Results<-unlist(Original)
print(Results)

##  [1] "bell pepper"  "bilberry"     "blackberry"   "blood orange" "blueberry"   
##  [6] "cantaloupe"   "chili pepper" "cloudberry"   "elderberry"   "lime"        
## [11] "lychee"       "mulberry"     "olive"        "salal berry"

print(Desired)

##  [1] "bell pepper"  "bilberry"     "blackberry"   "blood orange" "blueberry"   
##  [6] "cantaloupe"   "chili pepper" "cloudberry"   "elderberry"   "lime"        
## [11] "lychee"       "mulberry"     "olive"        "salal berry"

identical(Results, Desired)

## [1] TRUE

Question 3

#The following words would have the same character appearing 3 times in a row 
str_subset(words, "(.)\1\1")

## character(0)

#The following words will have one letter followed by 2 of the same letters and then the original letter again
str_subset(words, "(.)(.)\\2\\1")

##  [1] "afternoon"   "apparent"    "arrange"     "bottom"      "brilliant"  
##  [6] "common"      "difficult"   "effect"      "follow"      "indeed"     
## [11] "letter"      "million"     "opportunity" "oppose"      "tomorrow"

#The following words contains any two characters repeated
str_subset(words, "(..)\1")

## character(0)

#The following words in this group would contain a letter that is repeated every other time for 3 times. This would be "E"

str_subset(words, "(.).\\1.\\1")

## [1] "eleven"

#The following words contain three characters followed by zero or more characters of any kind followed by the same three characters but in reverse order. This would be "PAR" and "RAP"
str_subset(words, "(.)(.)(.).*\\3\\2\\1")

## [1] "paragraph"

Question 4

#Start and end with the same Character
str_subset(words, "^(.)((.*\\1$)|\\1?$)")

##  [1] "a"          "america"    "area"       "dad"        "dead"      
##  [6] "depend"     "educate"    "else"       "encourage"  "engine"    
## [11] "europe"     "evidence"   "example"    "excuse"     "exercise"  
## [16] "expense"    "experience" "eye"        "health"     "high"      
## [21] "knock"      "level"      "local"      "nation"     "non"       
## [26] "rather"     "refer"      "remember"   "serious"    "stairs"    
## [31] "test"       "tonight"    "transport"  "treat"      "trust"     
## [36] "window"     "yesterday"

#Contain a repeated pair of letters(Example is Church)
str_subset("church", "([A-Za-z][A-Za-z]).*\\1")

## [1] "church"

str_subset(words, "([A-Za-z][A-Za-z]).*\\1")

##  [1] "appropriate" "church"      "condition"   "decide"      "environment"
##  [6] "london"      "paragraph"   "particular"  "photograph"  "prepare"    
## [11] "pressure"    "remember"    "represent"   "require"     "sense"      
## [16] "therefore"   "understand"  "whether"

#Contain One letter repeated in at least 3 places(Such as Eleven that contains 3 e's)
str_subset("eleven", "([a-z]).*\\1.*\\1")

## [1] "eleven"

str_subset(words, "([a-z]).*\\1.*\\1")

##  [1] "appropriate" "available"   "believe"     "between"     "business"   
##  [6] "degree"      "difference"  "discuss"     "eleven"      "environment"
## [11] "evidence"    "exercise"    "expense"     "experience"  "individual" 
## [16] "paragraph"   "receive"     "remember"    "represent"   "telephone"  
## [21] "therefore"   "tomorrow"