# Load needed libraries
library(tidyverse)
library(RCurl)
library(knitr)
library(stringr)
# Source the file from the 538 Website github repository and set NA strings to 0
filename <- getURL("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv")
majors_list <- read.csv(text = filename,na.strings = "")
head(majors_list, 10)
grep(pattern = 'Data', majors_list$Major, value = TRUE, ignore.case = TRUE)
## [1] "COMPUTER PROGRAMMING AND DATA PROCESSING"
grep(pattern = 'statistics', majors_list$Major, value = TRUE, ignore.case = TRUE)
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "STATISTICS AND DECISION SCIENCE"
filename <- getURL("https://raw.githubusercontent.com/audiorunner13/Masters-Coursework/main/DATA607%20Spring%202021/Week3/Data/veggies.txt")
berry_veg <- read.delim(text=filename, header = FALSE, quote = "")
berry_veg$V1
## [1] "bell pepper" "bilberry" "blackberry" "blood orange" "blueberry"
## [6] "cantaloupe" "chili pepper" "cloudberry" "elderberry" "lime"
## [11] "lychee" "mulberry" "olive" "salal berry"
berry_veg_vec <- str_c(berry_veg,sep = ", ")
## Warning in stri_c(..., sep = sep, collapse = collapse, ignore_null = TRUE):
## argument is not an atomic vector; coercing
writeLines(berry_veg_vec)
## c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")
This will return an error since it is not enclosed in quotes
# str_view(words, (.)\1\1, match = TRUE)
Find all words that have a pair of letters that repeat in any order
str_view(words,"(.)(.)\\2\\1", match = TRUE)
This will return an error since it is not enclosed in quotes
#str_view(words,(..)\1, match = TRUE)
Find all words that have a letter that repeats more than once
str_view(words,"(.).\\1.\\1", match = TRUE)
Find all words that have 3 letters that repeat 0 or more times
str_view(words,"(.)(.)(.).*\\3\\2\\1", match = TRUE)
Start and end with the same character
words[str_detect(words, "^a.*a$")]
## [1] "america" "area"
str_view(words,"^a.*a$", match = TRUE)
Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.)
x <- c("church", "remember")
str_view(x,"(..)(..)\\1", match = TRUE)
Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)
test_word <- c("eleven", "twelve", "twentieth","ninconponop")
str_view(test_word,"(.)(.)(.).+\\1", match = TRUE)