library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Reading data from website.
url <- "https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/recent-grads.csv"
# Load the dataset from the url
college_majors <- read_csv(url)
## Rows: 173 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Major, Major_category
## dbl (19): Rank, Major_code, Total, Men, Women, ShareWomen, Sample_size, Empl...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Filter majors that contain "DATA" or "STATISTICS"
filtered_majors <- college_majors %>%
filter(grepl("DATA|STATISTICS", Major, ignore.case = TRUE))
# Show the majors that meet the specified criteria.
filtered_majors
[1] “bell pepper” “bilberry” “blackberry” “blood orange” [5]
“blueberry” “cantaloupe” “chili pepper” “cloudberry”
[9] “elderberry” “lime” “lychee” “mulberry”
[13] “olive” “salal berry” Into a format like this: c(“bell pepper”,
“bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”,
“chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”,
“mulberry”, “olive”, “salal berry”)
# Create a string vector
fruit_data <- c('[1] "bell pepper" "bilberry" "blackberry" "blood orange"
[5] "blueberry" "cantaloupe" "chili pepper" "cloudberry"
[9] "elderberry" "lime" "lychee" "mulberry"
[13] "olive" "salal berry"')
# Eliminate line identifiers and square brackets.
new_fruit_vector <- gsub("\\[\\d+\\] |\\n", "", fruit_data)
# Segment the string using double quotes as separators.
new_fruit_vector <- unlist(strsplit(new_fruit_vector, '\\"'))
# Filtering for only letters
new_fruit_vector <- new_fruit_vector[grep("[a-z]", new_fruit_vector)]
# Print the result
print(new_fruit_vector)
## [1] "bell pepper" "bilberry" "blackberry" "blood orange" "blueberry"
## [6] "cantaloupe" "chili pepper" "cloudberry" "elderberry" "lime"
## [11] "lychee" "mulberry" "olive" "salal berry"
comp_data <- c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")
# Print the result
comp_data
## [1] "bell pepper" "bilberry" "blackberry" "blood orange" "blueberry"
## [6] "cantaloupe" "chili pepper" "cloudberry" "elderberry" "lime"
## [11] "lychee" "mulberry" "olive" "salal berry"
(.)\1\1
: Matches three consecutive characters that
are all the same.
"(.)(.)\\2\\1"
: Matches any four character string
where the first and last characters are the same, and the middle two
characters are also the same.
(..)\1
: Matches any four character string where the
first two characters are the same as the last two characters.
"(.).\\1.\\1"
: Matches any five character string
that the first and the third character are the same, and the third and
fifth character are the same.
"(.)(.)(.).*\\3\\2\\1"
: Matches a string that begins
with three characters in any order, followed by any number of
characters, and ends with those same three characters in reverse
order.
a.) Start and end with the same character. “^(.).\1$” b.) Contain a repeated pair of letters (e.g. ”church” contains ”ch” repeated twice.) ”.(.)\1.\1.” c.) Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.) “.(.)\1\1\1.”