[https://fivethirtyeight.com/features/the-economic-guide-to-picking-a-college-major/], provide code that identifies the majors that contain either “DATA” or “STATISTICS”.
library(RCurl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::complete() masks RCurl::complete()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#Import CSV from Raw File of 173 majors listed in fivethirtyeight.com’s College Majors dataset
College_majors = read.csv('https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv')
# Identify the majors that contain either "DATA" or "STATISTICS"
Idenfied_majors <- College_majors %>%
filter(str_detect(Major, "DATA|STATISTICS"))
[1] “bell pepper” “bilberry” “blackberry” “blood orange” [5]
“blueberry” “cantaloupe” “chili pepper” “cloudberry”
[9] “elderberry” “lime” “lychee” “mulberry”
[13] “olive” “salal berry”
Into a format like this:
c(“bell pepper”, “bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”, “chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”, “mulberry”, “olive”, “salal berry”)
## "bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry"
## Length Class Mode
## 14 character character
• (.)\1\1 : This expression matches any character repeated three times in a row
# Check for matches of the pattern (.)\1\1 in each string
matches <- str_detect(String_data, "(.)\\1\\1")
# Display the matches
String_data[matches]
## character(0)
• “(.)(.)\2\1”: This expression matches two characters followed by the same two characters in reverse order
# Check for matches of the pattern (.)(.)\\2\\1 in each string
matches <- str_detect(String_data, "(.)(.)\\2\\1")
# Display the matches
String_data[matches]
## [1] "bell pepper" "chili pepper"
• (..)\1 : This expression matches two characters repeated twice. E.g “abab”, “abba”
# Check for matches of the pattern (..)\1 in each string
matches <- str_detect(String_data, "(..)\1")
# Display the matches
String_data[matches]
## character(0)
• “(.).\1.\1” : This expression matches a character repeated three times with characters in between each repetition, e.g. abaca
# Check for matches of the pattern "(.).\\1.\\1" in each string
matches <- str_detect(String_data, "(.).\\1.\\1")
# Display the matches
String_data[matches]
## character(0)
• “(.)(.)(.).*\3\2\1” : This expression matches characters followed by any character repeate 0 or more times and then the same three characters in reverse order.Eg. “abc312131cba”, “aaabbbccc”
# Check for matches of the pattern "(.)(.)(.).*\\3\\2\\1" in each string
matches <- str_detect(String_data, "(.)(.)(.).*\\3\\2\\1")
# Display the matches
String_data[matches]
## character(0)
• Start and end with the same character.
# Consider words in the category of jewelries
Jewelries <- c(
"studs", "necklaces", "bracelets", "rings",
"pendant", "earrings", "anklets", "watches",
"brooches", "cufflinks", "chains", "headpieces",
"bangles", "hoops"
)
similarities_a <- str_subset(Jewelries, "^(.)((.*\\1$)|\\1?$)")
similarities_a
## [1] "studs"
• Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.)
similarities_b <- str_subset(Jewelries, "(..).*\\1")
similarities_b
## character(0)
• Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s
similarities_c <- str_subset(Jewelries, "(.).*\\1.*\\1")
similarities_c
## [1] "headpieces"