data <- read_excel("../00_data/us_avg_tuition (1).xlsx")
data$State
## [1] "Alabama" "Alaska" "Arizona" "Arkansas"
## [5] "California" "Colorado" "Connecticut" "Delaware"
## [9] "Florida" "Georgia" "Hawaii" "Idaho"
## [13] "Illinois" "Indiana" "Iowa" "Kansas"
## [17] "Kentucky" "Louisiana" "Maine" "Maryland"
## [21] "Massachusetts" "Michigan" "Minnesota" "Mississippi"
## [25] "Missouri" "Montana" "Nebraska" "Nevada"
## [29] "New Hampshire" "New Jersey" "New Mexico" "New York"
## [33] "North Carolina" "North Dakota" "Ohio" "Oklahoma"
## [37] "Oregon" "Pennsylvania" "Rhode Island" "South Carolina"
## [41] "South Dakota" "Tennessee" "Texas" "Utah"
## [45] "Vermont" "Virginia" "Washington" "West Virginia"
## [49] "Wisconsin" "Wyoming"
str_detect(data$State, "Connecticut")
## [1] FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE
sum(str_detect(data$State, "Connecticut"))
## [1] 1
data %>%
summarise(num_Connecticut = sum(str_detect(State, "Connecticut")))
## # A tibble: 1 × 1
## num_Connecticut
## <int>
## 1 1
data %>%
mutate(col_Connecticut = str_extract(State, "Connecticut")) %>%
select(State, col_Connecticut) %>%
filter(!is.na(col_Connecticut))
## # A tibble: 1 × 2
## State col_Connecticut
## <chr> <chr>
## 1 Connecticut Connecticut
data %>%
mutate(col_Flordia = str_replace(State, "Connecticut", "Flordia")) %>%
select(State, col_Flordia)
## # A tibble: 50 × 2
## State col_Flordia
## <chr> <chr>
## 1 Alabama Alabama
## 2 Alaska Alaska
## 3 Arizona Arizona
## 4 Arkansas Arkansas
## 5 California California
## 6 Colorado Colorado
## 7 Connecticut Flordia
## 8 Delaware Delaware
## 9 Florida Florida
## 10 Georgia Georgia
## # ℹ 40 more rows