#https://rpubs.com/ericonsi/725341
library(openintro)
library(tinytex)
library(tidyverse)
library(dplyr)
library(stringr)
Below are exercises related to the stringr packagae, which allow us to work easily and effectively with strings
Using the 173 majors listed in fivethirtyeight.com’s College Majors dataset [https://fivethirtyeight.com/features/the-economic-guide-to-picking-a-college-major/], provide code that identifies the majors that contain either “DATA” or “STATISTICS”
library(stringr)
library(dplyr)
dfMajors <- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv", header= TRUE)
x <- filter(dfMajors, str_detect(dfMajors$Major, ("STATISTICS|DATA")))
x
## FOD1P Major Major_Category
## 1 6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS Business
## 2 2101 COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
## 3 3702 STATISTICS AND DECISION SCIENCE Computers & Mathematics
Write code that transforms the data below:
[1] “bell pepper” “bilberry” “blackberry” “blood orange”
[5] “blueberry” “cantaloupe” “chili pepper” “cloudberry”
[9] “elderberry” “lime” “lychee” “mulberry”
[13] “olive” “salal berry”
Into a format like this:
c(“bell pepper”, “bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”, “chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”, “mulberry”, “olive”, “salal berry”)
v1<-('"bell pepper" "bilberry" "blackberry" "blood orange"')
v2<-('"blueberry" "cantaloupe" "chili pepper" "cloudberry"')
v3<-('"elderberry" "lime" "lychee" "mulberry"')
v4<-('"olive" "salal berry"')
vFruits<-str_c(v1, v2, v3, v4)
vFruits<-str_remove_all(vFruits, " {2,}")
vFruits<-str_c("c(",str_replace_all(vFruits, '\""|\" "', '\", "'),")")
writeLines(vFruits)
## c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")
Describe, in words, what these expressions will match:
words <- c("house\1\1", "better", "Everest", "reviver", "remain", "lesser", "parallel", "eleven", "Yay!!!", "reread", "bookkeeper")
a<-stringr::str_view(words, "(.)\1\1", match = TRUE) #Not properly escaped
a
a1<-stringr::str_view(words, "(.)\\1\\1", match = TRUE)
a1
b<-stringr::str_view(words, "(.)(.)\\2\\1", match = TRUE)
b
c<-stringr::str_view(words, "(..)\1", match = TRUE) #Not properly escaped
c
c1<-stringr::str_view(words, "(..)\\1", match = TRUE)
c1
d<-stringr::str_view(words, "(.).\\1.\\1", match = TRUE)
d
e<-stringr::str_view(words, "(.)(.)(.).*\\3\\2\\1", match = TRUE)
e
words <- c("demented", "edified", "medicalization", "meelee", "perjurer", "parallel", "eleven", "reread", "bookkeeper")
Construct regular expressions to match words that:
Start and end with the same character.
^(.).*\1$
a<-stringr::str_view(words, "^(.).*\\1$", match = TRUE)
a
Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.)
(.)(.).*\1\2
b<-stringr::str_view(words, "(.)(.).*\\1\\2", match = TRUE)
b
Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)
(.).\1.\1
c<-stringr::str_view(words, "(.).*\\1.*\\1", match = TRUE)
c
string = " Please read carefully the following paragraph from wikipedia about the impending robot uprising: "
s_trimmed <- str_trim(string)
s_trimmed
## [1] "Please read carefully the following paragraph from wikipedia about the impending robot uprising:"
string2 <- "An AI takeover is a hypothetical scenario in which artificial intelligence (AI) becomes the dominant form of intelligence on Earth, with computer programs or robots effectively taking the control of the planet away from the human species. Possible scenarios include replacement of the entire human workforce, takeover by a superintelligent AI, and the popular notion of a robot uprising. -Wikipedia"
s_paragraph <- cat(str_wrap(string2, width = 60, indent = 10, exdent= 5), "\n")
## An AI takeover is a hypothetical scenario in which
## artificial intelligence (AI) becomes the dominant form
## of intelligence on Earth, with computer programs or
## robots effectively taking the control of the planet
## away from the human species. Possible scenarios include
## replacement of the entire human workforce, takeover
## by a superintelligent AI, and the popular notion of a
## robot uprising. -Wikipedia
string3 = " The robot uprising is upon us! Help!! "
str_dup(string3, times=6)
## [1] " The robot uprising is upon us! Help!! The robot uprising is upon us! Help!! The robot uprising is upon us! Help!! The robot uprising is upon us! Help!! The robot uprising is upon us! Help!! The robot uprising is upon us! Help!! "
string3 <-str_trim(cat(str_wrap( str_dup(string3, times=3), width = 44, indent = 5, exdent= 5), "\n"))
## The robot uprising is upon us! Help!!
## The robot uprising is upon us! Help!!
## The robot uprising is upon us! Help!!
Stringr is a handy way to do a lot with little code.