This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.2 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
collage_major <- read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/all-ages.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## Major_code = col_double(),
## Major = col_character(),
## Major_category = col_character(),
## Total = col_double(),
## Employed = col_double(),
## Employed_full_time_year_round = col_double(),
## Unemployed = col_double(),
## Unemployment_rate = col_double(),
## Median = col_double(),
## P25th = col_double(),
## P75th = col_double()
## )
collage_major
## # A tibble: 173 x 11
## Major_code Major Major_category Total Employed Employed_full_t~ Unemployed
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1100 GENERA~ Agriculture &~ 128148 90245 74078 2423
## 2 1101 AGRICU~ Agriculture &~ 95326 76865 64240 2266
## 3 1102 AGRICU~ Agriculture &~ 33955 26321 22810 821
## 4 1103 ANIMAL~ Agriculture &~ 103549 81177 64937 3619
## 5 1104 FOOD S~ Agriculture &~ 24280 17281 12722 894
## 6 1105 PLANT ~ Agriculture &~ 79409 63043 51077 2070
## 7 1106 SOIL S~ Agriculture &~ 6586 4926 4042 264
## 8 1199 MISCEL~ Agriculture &~ 8549 6392 5074 261
## 9 1301 ENVIRO~ Biology & Lif~ 106106 87602 65238 4736
## 10 1302 FOREST~ Agriculture &~ 69447 48228 39613 2144
## # ... with 163 more rows, and 4 more variables: Unemployment_rate <dbl>,
## # Median <dbl>, P25th <dbl>, P75th <dbl>
#1 Code that identifies the majors that contain either “DATA” or “STATISTICS”
library("stringr")
major1 <- str_subset(collage_major$Major, "DATA")
major1
## [1] "COMPUTER PROGRAMMING AND DATA PROCESSING"
major1 <- collage_major[str_detect(collage_major$Major, "DATA"), ]
major1
## # A tibble: 1 x 11
## Major_code Major Major_category Total Employed Employed_full_t~ Unemployed
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2101 COMPUTE~ Computers & Ma~ 29317 22828 18747 2265
## # ... with 4 more variables: Unemployment_rate <dbl>, Median <dbl>,
## # P25th <dbl>, P75th <dbl>
major2 <- str_subset(collage_major$Major, "STATISTICS")
major2
## [1] "STATISTICS AND DECISION SCIENCE"
## [2] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
major2 <- collage_major[str_detect(collage_major$Major, "STATISTICS"), ]
major2
## # A tibble: 2 x 11
## Major_code Major Major_category Total Employed Employed_full_t~ Unemployed
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 3702 STATIS~ Computers & Ma~ 24806 18808 14468 1138
## 2 6212 MANAGE~ Business 156673 134478 118249 6186
## # ... with 4 more variables: Unemployment_rate <dbl>, Median <dbl>,
## # P25th <dbl>, P75th <dbl>
#2 Write code that transforms the data below:
# Creating four vectors with quotations added:
fruit_category_1 <- c("bell pepper", "bilberry", "blackberry", "blood orange")
fruit_category_2 <- c("blueberry", "cantaloupe", "chili pepper", "cloudberry")
fruit_category_3 <- c("elderberry", "lime", "lychee", "mulberry")
fruit_category_4 <- c("olive", "lsalal berry")
writeLines(fruit_category_1)
## bell pepper
## bilberry
## blackberry
## blood orange
fruit_category_1 <- str_c("\"", fruit_category_1, "\"")
writeLines(fruit_category_1)
## "bell pepper"
## "bilberry"
## "blackberry"
## "blood orange"
fruit_category_2 <- str_c("\"", fruit_category_2, "\"")
writeLines(fruit_category_2)
## "blueberry"
## "cantaloupe"
## "chili pepper"
## "cloudberry"
fruit_category_3 <- str_c("\"", fruit_category_3, "\"")
writeLines(fruit_category_3)
## "elderberry"
## "lime"
## "lychee"
## "mulberry"
fruit_category_4 <- str_c("\"", fruit_category_4, "\"")
writeLines(fruit_category_4)
## "olive"
## "lsalal berry"
x <- str_c(c(fruit_category_1, fruit_category_2, fruit_category_3, fruit_category_4), collapse = ", ")
writeLines(x)
## "bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "lsalal berry"
#3 Describe, in words, what these expressions will match:
# This will match nothing because single backslash does not define any rule
str_view(c("abbbbc", "a.c", "a*c", "a c"), "(.)\1\1")
# This will look for two characters that are repeating in reverse order
str_view(c("grrgff", "a.ccc.c", "a*aa*c", "a c"), "(.)(.)\\2\\1")
# This will match nothing because single backslash does not define any rule
str_view(c("grrrrgff", "a.ccc.c.c", "a*aaaa*c", "a c"), "(..)\1")
# 1-This will find a character, 2- followed by any one character 3- and repeat of 1st character
# and again repeat step 2 and 3.
str_view(c("grrrrgff", "a.c2c3c.c.c", "a*adafaa*c"), "(.).\\1.\\1")
# This will find three charactrs followed by any number of other characters
# then the repeat of first three character in reverse order
str_view(c("lmnx*xmnlff", "a.uzmac2c3camzu.c.c", "a*adafaa*c"), "(.)(.)(.).*\\3\\2\\1")
#4 Construct regular expressions to match words that:
# Start and end with the same character.
# (.).*\\1
str_view(c("lmnx*xmnl", "a.uzmac2c3camzu.c.a", "a*adafaa*c"), "(.).*\\1")
# Contain a repeated pair of letters (e.g. "church" contains "ch" repeated twice.)
# (.)(.).*\\1\\1
str_view(c("lmnx*xmnl", "a.uzmac2c3camzu.c.a", "churchac"), "(.)(.).*\\1\\2")
# Contain one letter repeated in at least three places (e.g. "eleven" contains three "e"s.)
str_view(c("lmnx*xmnl", "eleven", "churchac"), "(.).*\\1*\\1*\\1")