Import your data

ufo_sightings <- read_csv("../00_data/ufo_sightings.csv")
## Rows: 60632 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): Location.City, Location.State, Location.Country, Data.Shape, Data....
## dbl (11): Data.Encounter duration, Location.Coordinates.Latitude, Location.C...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ufo_sightings_small <- ufo_sightings %>% select(where(is.character)) %>% head(n = 100)

Chapter 14

Tools

Detect matches

ufo_sightings_small$Location.City
##   [1] "anchor point" "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##   [6] "anchorage"    "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##  [11] "anchorage"    "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##  [16] "anchorage"    "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##  [21] "anchorage"    "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##  [26] "anchorage"    "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##  [31] "anchorage"    "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##  [36] "anchorage"    "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##  [41] "anchorage"    "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##  [46] "anchorage"    "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##  [51] "anchorage"    "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##  [56] "anchorage"    "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##  [61] "anchorage"    "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##  [66] "anchorage"    "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##  [71] "anchorage"    "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##  [76] "anchorage"    "anchorage"    "anchorage"    "anchorage"    "anchorage"   
##  [81] "anchorage"    "anchorage"    "anchorage"    "angoon"       "auke bay"    
##  [86] "auke bay"     "bethel"       "bethel"       "bethel"       "bethel"      
##  [91] "bethel"       "bethel"       "bethel"       "bethel"       "big lake"    
##  [96] "butte"        "chugiak"      "chugiak"      "clam gulch"   "cold bay"
str_detect(ufo_sightings_small$Location.City, "anchorage")
##   [1] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [13]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [25]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [37]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [49]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [61]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [73]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [97] FALSE FALSE FALSE FALSE
sum(str_detect(ufo_sightings_small$Location.City, "anchorage"))
## [1] 82
ufo_sightings_small %>% 
    summarise(num_anchorage = sum(str_detect(Location.City, "anchorage")))
## # A tibble: 1 × 1
##   num_anchorage
##           <int>
## 1            82

Extract matches

ufo_sightings %>% 
    mutate(col_anchorage = str_extract(Location.City, "anchorage")) %>% 
    select(Location.City, col_anchorage) %>% 
    filter(!is.na(col_anchorage))
## # A tibble: 82 × 2
##    Location.City col_anchorage
##    <chr>         <chr>        
##  1 anchorage     anchorage    
##  2 anchorage     anchorage    
##  3 anchorage     anchorage    
##  4 anchorage     anchorage    
##  5 anchorage     anchorage    
##  6 anchorage     anchorage    
##  7 anchorage     anchorage    
##  8 anchorage     anchorage    
##  9 anchorage     anchorage    
## 10 anchorage     anchorage    
## # ℹ 72 more rows

Replacing matches

ufo_sightings %>%
    mutate(col_fairbanks = str_replace(Location.City, "anchorage", "fairbanks")) %>% 
    select(Location.City, col_fairbanks)
## # A tibble: 60,632 × 2
##    Location.City col_fairbanks
##    <chr>         <chr>        
##  1 anchor point  anchor point 
##  2 anchorage     fairbanks    
##  3 anchorage     fairbanks    
##  4 anchorage     fairbanks    
##  5 anchorage     fairbanks    
##  6 anchorage     fairbanks    
##  7 anchorage     fairbanks    
##  8 anchorage     fairbanks    
##  9 anchorage     fairbanks    
## 10 anchorage     fairbanks    
## # ℹ 60,622 more rows