#https://rpubs.com/ericonsi/725341
library(openintro)
library(tinytex)
library(tidyverse)
library(dplyr)
library(stringr)

Homework 3: Working with Strings

Below are exercises related to the stringr packagae, which allow us to work easily and effectively with strings

Excercise 1.

Using the 173 majors listed in fivethirtyeight.com’s College Majors dataset [https://fivethirtyeight.com/features/the-economic-guide-to-picking-a-college-major/], provide code that identifies the majors that contain either “DATA” or “STATISTICS”

library(stringr)
library(dplyr)
dfMajors <- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv", header= TRUE)
x <- filter(dfMajors, str_detect(dfMajors$Major, ("STATISTICS|DATA")))
x
##   FOD1P                                         Major          Major_Category
## 1  6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS                Business
## 2  2101      COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
## 3  3702               STATISTICS AND DECISION SCIENCE Computers & Mathematics

Excercise 2.

Write code that transforms the data below:

[1] “bell pepper” “bilberry” “blackberry” “blood orange”

[5] “blueberry” “cantaloupe” “chili pepper” “cloudberry”

[9] “elderberry” “lime” “lychee” “mulberry”

[13] “olive” “salal berry”

Into a format like this:

c(“bell pepper”, “bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”, “chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”, “mulberry”, “olive”, “salal berry”)

v1<-('"bell pepper"  "bilberry"     "blackberry"   "blood orange"')
v2<-('"blueberry"    "cantaloupe"   "chili pepper" "cloudberry"')
v3<-('"elderberry"   "lime"         "lychee"       "mulberry"')
v4<-('"olive"        "salal berry"')

vFruits<-str_c(v1, v2, v3, v4)
vFruits<-str_remove_all(vFruits, " {2,}")
vFruits<-str_c("c(",str_replace_all(vFruits, '\""|\" "', '\", "'),")")
writeLines(vFruits)
## c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")

Exercise 3.

Describe, in words, what these expressions will match:

words <- c("house\1\1", "better", "Everest", "reviver", "remain", "lesser", "parallel", "eleven", "Yay!!!", "reread", "bookkeeper")
  1. (.)\1\1 - This will match sequences with \1\1 but it will read the slashes as escape characters. If properly escaped, this will match a 3 character sequence in which each character is the same.
a<-stringr::str_view(words, "(.)\1\1", match = TRUE) #Not properly escaped
a
a1<-stringr::str_view(words, "(.)\\1\\1", match = TRUE) 
a1
  1. (.)(.)\2\1 - This will match a sequence which is a four character palindrome.
b<-stringr::str_view(words, "(.)(.)\\2\\1", match = TRUE)
b
  1. (..)\1 - This will match sequences with \1 after two characters but it will read the slash as an escape character. If properly escaped, this will match a four character sequence in which a pair of characters is repeated.
c<-stringr::str_view(words, "(..)\1", match = TRUE) #Not properly escaped
c
c1<-stringr::str_view(words, "(..)\\1", match = TRUE)
c1
  1. (.).\1.\1 - This will match a five character sequence in which the first, third and fifth character are the same.
d<-stringr::str_view(words, "(.).\\1.\\1", match = TRUE)
d
  1. (.)(.)(.).*\3\2\1 - This will match a character sequence of 6 or more in which the last three characters are a reverse of the first three.
e<-stringr::str_view(words, "(.)(.)(.).*\\3\\2\\1", match = TRUE)
e

Exercise 4.

words <- c("demented", "edified", "medicalization", "meelee", "perjurer", "parallel", "eleven", "reread", "bookkeeper")

Construct regular expressions to match words that:

  1. Start and end with the same character.

    ^(.).*\1$

a<-stringr::str_view(words, "^(.).*\\1$", match = TRUE)
a
  1. Contain a repeated pair of letters (e.g. “church” contains “ch” repeated twice.)

    (.)(.).*\1\2

b<-stringr::str_view(words, "(.)(.).*\\1\\2", match = TRUE)
b
  1. Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)

    (.).\1.\1

c<-stringr::str_view(words, "(.).*\\1.*\\1", match = TRUE)
c

Other useful things you can do with stringr that I have found

  1. Trim whitespace
string = "             Please read carefully the following paragraph from wikipedia about the impending robot uprising:            "
s_trimmed <- str_trim(string)
s_trimmed
## [1] "Please read carefully the following paragraph from wikipedia about the impending robot uprising:"
  1. Wrap strings into nice paragraphs
string2 <- "An AI takeover is a hypothetical scenario in which artificial intelligence (AI) becomes the dominant form of intelligence on Earth, with computer programs or robots effectively taking the control of the planet away from the human species. Possible scenarios include replacement of the entire human workforce, takeover by a superintelligent AI, and the popular notion of a robot uprising.  -Wikipedia"

s_paragraph <- cat(str_wrap(string2, width = 60, indent = 10, exdent= 5), "\n")
##           An AI takeover is a hypothetical scenario in which
##      artificial intelligence (AI) becomes the dominant form
##      of intelligence on Earth, with computer programs or
##      robots effectively taking the control of the planet
##      away from the human species. Possible scenarios include
##      replacement of the entire human workforce, takeover
##      by a superintelligent AI, and the popular notion of a
##      robot uprising. -Wikipedia
  1. Repeat a string several times
string3 = "  The robot uprising is upon us! Help!!  "
str_dup(string3, times=6)
## [1] "  The robot uprising is upon us! Help!!    The robot uprising is upon us! Help!!    The robot uprising is upon us! Help!!    The robot uprising is upon us! Help!!    The robot uprising is upon us! Help!!    The robot uprising is upon us! Help!!  "
  1. Here are the techniques used all together
string3 <-str_trim(cat(str_wrap( str_dup(string3, times=3), width = 44, indent = 5, exdent= 5), "\n"))
##      The robot uprising is upon us! Help!!
##      The robot uprising is upon us! Help!!
##      The robot uprising is upon us! Help!!

Final thoughts

Stringr is a handy way to do a lot with little code.