This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(stringr)
# I uploaded the csv dataset to my github where I created a repository for the dataset. I then uploaded the dataset to RStudio.
theUrl <- "https://raw.githubusercontent.com/enidroman/data607_acquisition_and_managent/main/majors-list.csv"
college_major_list <- read.table(file=theUrl, header=TRUE, sep=",")
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## EOF within quoted string
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## number of items read is not a multiple of the number of columns
head(college_major_list)
## FOD1P Major Major_Category
## 1 1100 GENERAL AGRICULTURE Agriculture & Natural Resources
## 2 1101 AGRICULTURE PRODUCTION AND MANAGEMENT Agriculture & Natural Resources
## 3 1102 AGRICULTURAL ECONOMICS Agriculture & Natural Resources
## 4 1103 ANIMAL SCIENCES Agriculture & Natural Resources
## 5 1104 FOOD SCIENCE Agriculture & Natural Resources
## 6 1105 PLANT SCIENCE AND AGRONOMY Agriculture & Natural Resources
# I did a grep() function to search for matches of a pattern within each element of the given string.
data_stat <- college_major_list
grep("DATA|STATISTICS", college_major_list$Major, value = TRUE)
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "COMPUTER PROGRAMMING AND DATA PROCESSING"
## [3] "STATISTICS AND DECISION SCIENCE"
# I created a dataframe for the input provide above.
fruit_veg <- ('[1] "bell pepper" "bilberry" "blackberry" "blood orange"
[5] "blueberry" "cantaloupe" "chili pepper" "cloudberry"
[9] "elderberry" "lime" "lychee" "mulberry"
[13] "olive" "salal berry"')
fruit_veg
## [1] "[1] \"bell pepper\" \"bilberry\" \"blackberry\" \"blood orange\"\n[5] \"blueberry\" \"cantaloupe\" \"chili pepper\" \"cloudberry\" \n[9] \"elderberry\" \"lime\" \"lychee\" \"mulberry\" \n[13] \"olive\" \"salal berry\""
# I used the str_extract_all function to remove the backslash and the space before the second quotation from the begining of the string to the end.
new_fruit_veg <- str_extract_all(fruit_veg, "\\w[a-z]+\\s?[a-z]+\\w")
new_fruit_veg
## [[1]]
## [1] "bell pepper" "bilberry" "blackberry" "blood orange" "blueberry"
## [6] "cantaloupe" "chili pepper" "cloudberry" "elderberry" "lime"
## [11] "lychee" "mulberry" "olive" "salal berry"
# I used a collapse function to combine several cases into single lines.
new_fruit_veg <- str_c(new_fruit_veg, collapse = ", ")
## Warning in stri_c(..., sep = sep, collapse = collapse, ignore_null = TRUE):
## argument is not an atomic vector; coercing
new_fruit_veg
## [1] "c(\"bell pepper\", \"bilberry\", \"blackberry\", \"blood orange\", \"blueberry\", \"cantaloupe\", \"chili pepper\", \"cloudberry\", \"elderberry\", \"lime\", \"lychee\", \"mulberry\", \"olive\", \"salal berry\")"
# I used the writeLines function to write the sequence of multiple lines to the text file.
writeLines(new_fruit_veg)
## c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")
x <- c("abcdddef", "ggghijk", "lmnop")
str_view(x, "(.)\\1\\1", match = TRUE)
str_view_all(stringr::fruit, "(.)(.)\\2\\1", match = TRUE)
str_view_all(stringr::fruit, "(..)\\1", match = TRUE)
str_view(stringr::words, "(.).\\1.\\1", match = TRUE)
str_view(stringr::words, "(.)(.)(.).*\\3\\2\\1", match = TRUE)
str_subset(words, "^(.)((.*\\1$)|\\1?$)")
## [1] "a" "america" "area" "dad" "dead"
## [6] "depend" "educate" "else" "encourage" "engine"
## [11] "europe" "evidence" "example" "excuse" "exercise"
## [16] "expense" "experience" "eye" "health" "high"
## [21] "knock" "level" "local" "nation" "non"
## [26] "rather" "refer" "remember" "serious" "stairs"
## [31] "test" "tonight" "transport" "treat" "trust"
## [36] "window" "yesterday"
str_subset(words, "([A-Za-z][A-Za-z]).*\\1")
## [1] "appropriate" "church" "condition" "decide" "environment"
## [6] "london" "paragraph" "particular" "photograph" "prepare"
## [11] "pressure" "remember" "represent" "require" "sense"
## [16] "therefore" "understand" "whether"
str_subset(words, "([a-z]).*\\1.*\\1")
## [1] "appropriate" "available" "believe" "between" "business"
## [6] "degree" "difference" "discuss" "eleven" "environment"
## [11] "evidence" "exercise" "expense" "experience" "individual"
## [16] "paragraph" "receive" "remember" "represent" "telephone"
## [21] "therefore" "tomorrow"