setwd("C:/Users/malia/OneDrive/Desktop/MSDS DATA 607")
assigment3_dat<-read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv")
assigment3_dat1<-c(assigment3_dat$Major)
head(assigment3_dat1)
## [1] "GENERAL AGRICULTURE"
## [2] "AGRICULTURE PRODUCTION AND MANAGEMENT"
## [3] "AGRICULTURAL ECONOMICS"
## [4] "ANIMAL SCIENCES"
## [5] "FOOD SCIENCE"
## [6] "PLANT SCIENCE AND AGRONOMY"
assigment3_dat1[str_detect(assigment3_dat1,
pattern = "DATA"
)]
## [1] "COMPUTER PROGRAMMING AND DATA PROCESSING"
assigment3_dat1[str_detect(assigment3_dat1,
pattern = "STATISTICS")]
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "STATISTICS AND DECISION SCIENCE"
[1] “bell pepper” “bilberry” “blackberry” “blood orange”
[5] “blueberry” “cantaloupe” “chili pepper” “cloudberry”
[9] “elderberry” “lime” “lychee” “mulberry”
[13] “olive” “salal berry”
Into a format like this:
c(“bell pepper”, “bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”, “chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”, “mulberry”, “olive”, “salal berry”)
FRUITS1 <- '[1] "bell pepper" "bilberry" "blackberry" "blood orange"
[5] "blueberry" "cantaloupe" "chili pepper" "cloudberry"
[9] "elderberry" "lime" "lychee" "mulberry"
[13] "olive" "salal berry"'
FRUITS1 <- str_extract_all(FRUITS1, "(.+?)+")
FRUITS1 <- unlist(FRUITS1)
FRUITS1
## [1] "[1] \"bell pepper\" \"bilberry\" \"blackberry\" \"blood orange\""
## [2] "[5] \"blueberry\" \"cantaloupe\" \"chili pepper\" \"cloudberry\" "
## [3] "[9] \"elderberry\" \"lime\" \"lychee\" \"mulberry\" "
## [4] "[13] \"olive\" \"salal berry\""
Fruitlist1<-glue_collapse(FRUITS1,sep = ", ")
print(Fruitlist1)
## [1] "bell pepper" "bilberry" "blackberry" "blood orange", [5] "blueberry" "cantaloupe" "chili pepper" "cloudberry" , [9] "elderberry" "lime" "lychee" "mulberry" , [13] "olive" "salal berry"
c(Fruitlist1)
## [1] "[1] \"bell pepper\" \"bilberry\" \"blackberry\" \"blood orange\", [5] \"blueberry\" \"cantaloupe\" \"chili pepper\" \"cloudberry\" , [9] \"elderberry\" \"lime\" \"lychee\" \"mulberry\" , [13] \"olive\" \"salal berry\""
Fruitlist1<- str_replace_all(Fruitlist1, "[\\[\\]]", "") # for removing square brackets
Fruitlist1 <- str_replace_all(Fruitlist1, "[!^[:digit:]]", "") #removing digits
Fruitlist1 <- str_replace_all(Fruitlist1, "\\\n", "") #removing "\n"
Fruitlist1 <- str_replace_all(Fruitlist1, '[\"]', "'") #replacing '\"' with "'"
Fruitlist1<- trimws(Fruitlist1) #removing leading / trailing whitespace
Fruitlist1 <- str_replace_all(Fruitlist1, "\\s+", " ") #compresseng whitespace
Fruitlist1 <- str_replace_all(Fruitlist1, "' '", "','")
print(Fruitlist1)
## [1] "'bell pepper','bilberry','blackberry','blood orange', 'blueberry','cantaloupe','chili pepper','cloudberry' , 'elderberry','lime','lychee','mulberry' , 'olive','salal berry'"
###Start and end with the same character.
word_list <- c("Apple","LIL","Orange","Blue","ELLE")
same_char <- "^(.).*\\1$"
word_list %>%
str_subset(same_char)
## [1] "LIL" "ELLE"
repeated_thrice <- c("eleven","committee","Emimtmt")
slicing <- "(.).\\1.\\1"
repeated_thrice %>%
str_subset(slicing)
## [1] "eleven" "Emimtmt"
repeated_twice <- c("church","papa","apple")
slicing1 <- "(.)(.).*\\1"
repeated_twice %>%
str_subset(slicing1)
## [1] "church" "papa"