Import your data

    data <- read_excel("../00_data/us_avg_tuition (1).xlsx")

Chapter 14

Tools

Detect matches

data$State
##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"
str_detect(data$State, "Connecticut")
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE
sum(str_detect(data$State, "Connecticut"))
## [1] 1
data %>%
    summarise(num_Connecticut = sum(str_detect(State, "Connecticut")))
## # A tibble: 1 × 1
##   num_Connecticut
##             <int>
## 1               1

Extract matches

data %>%
    mutate(col_Connecticut = str_extract(State, "Connecticut")) %>%
    select(State, col_Connecticut) %>%
    filter(!is.na(col_Connecticut))
## # A tibble: 1 × 2
##   State       col_Connecticut
##   <chr>       <chr>          
## 1 Connecticut Connecticut

Replacing matches

data %>%
    mutate(col_Flordia = str_replace(State, "Connecticut", "Flordia")) %>%
    select(State, col_Flordia)
## # A tibble: 50 × 2
##    State       col_Flordia
##    <chr>       <chr>      
##  1 Alabama     Alabama    
##  2 Alaska      Alaska     
##  3 Arizona     Arizona    
##  4 Arkansas    Arkansas   
##  5 California  California 
##  6 Colorado    Colorado   
##  7 Connecticut Flordia    
##  8 Delaware    Delaware   
##  9 Florida     Florida    
## 10 Georgia     Georgia    
## # ℹ 40 more rows