Import your data

# excel file
airlines <- read_excel("../00_data/MyData.xlsx") %>%
     mutate(n_events = as.numeric(n_events)) %>%
    mutate(avail_seat_km_per_week = as.numeric(avail_seat_km_per_week))
airlines
## # A tibble: 336 × 6
##      Ref airline               avail_seat_km_per_week year_range type_…¹ n_eve…²
##    <dbl> <chr>                                  <dbl> <chr>      <chr>     <dbl>
##  1    NA Aer Lingus                         320906734 85_99      incide…       2
##  2     2 Aeroflot*                         1197672318 85_99      incide…      76
##  3     3 Aerolineas Argentinas              385803648 85_99      incide…       6
##  4     4 Aeromexico*                        596871813 85_99      incide…       3
##  5     5 Air Canada                        1865253802 85_99      incide…       2
##  6     6 Air France                        3004002661 85_99      incide…      14
##  7     7 Air India*                         869253552 85_99      incide…       2
##  8     8 Air New Zealand*                   710174817 85_99      incide…       3
##  9     9 Alaska Airlines*                   965346773 85_99      incide…       5
## 10    10 Alitalia                           698012498 85_99      incide…       7
## # … with 326 more rows, and abbreviated variable names ¹​type_of_event,
## #   ²​n_events
airlines
## # A tibble: 336 × 6
##      Ref airline               avail_seat_km_per_week year_range type_…¹ n_eve…²
##    <dbl> <chr>                                  <dbl> <chr>      <chr>     <dbl>
##  1    NA Aer Lingus                         320906734 85_99      incide…       2
##  2     2 Aeroflot*                         1197672318 85_99      incide…      76
##  3     3 Aerolineas Argentinas              385803648 85_99      incide…       6
##  4     4 Aeromexico*                        596871813 85_99      incide…       3
##  5     5 Air Canada                        1865253802 85_99      incide…       2
##  6     6 Air France                        3004002661 85_99      incide…      14
##  7     7 Air India*                         869253552 85_99      incide…       2
##  8     8 Air New Zealand*                   710174817 85_99      incide…       3
##  9     9 Alaska Airlines*                   965346773 85_99      incide…       5
## 10    10 Alitalia                           698012498 85_99      incide…       7
## # … with 326 more rows, and abbreviated variable names ¹​type_of_event,
## #   ²​n_events

Chapter 14

Tools

Detect matches

airlines %>%
    summarise(sum(str_detect(year_range, "85_99$")))
## # A tibble: 1 × 1
##   `sum(str_detect(year_range, "85_99$"))`
##                                     <int>
## 1                                     168
str_detect(airlines$year_range, "85_99$")
##   [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [13]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [25]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [37]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [49]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [61]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [73]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [85]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [97]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [109]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [121]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [133]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [145]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [157]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [277] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [289] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [301] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [313] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [325] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
sum(str_detect(airlines$year_range, "85_99$"))
## [1] 168

Extract matches

airlines_1 <- c("Air Canada", "Air France", "Condor", "Korean Air", "Air Lingus", "Southwest Airlines")
airline_match <- str_c(airlines_1, collapse = "|")
airline_match
## [1] "Air Canada|Air France|Condor|Korean Air|Air Lingus|Southwest Airlines"
str_extract(airlines_1, "Air France")
## [1] NA           "Air France" NA           NA           NA          
## [6] NA

Replacing matches

airlines %>% mutate(n_events = n_events %>% str_replace("^[0]", "-"))
## # A tibble: 336 × 6
##      Ref airline               avail_seat_km_per_week year_range type_…¹ n_eve…²
##    <dbl> <chr>                                  <dbl> <chr>      <chr>   <chr>  
##  1    NA Aer Lingus                         320906734 85_99      incide… 2      
##  2     2 Aeroflot*                         1197672318 85_99      incide… 76     
##  3     3 Aerolineas Argentinas              385803648 85_99      incide… 6      
##  4     4 Aeromexico*                        596871813 85_99      incide… 3      
##  5     5 Air Canada                        1865253802 85_99      incide… 2      
##  6     6 Air France                        3004002661 85_99      incide… 14     
##  7     7 Air India*                         869253552 85_99      incide… 2      
##  8     8 Air New Zealand*                   710174817 85_99      incide… 3      
##  9     9 Alaska Airlines*                   965346773 85_99      incide… 5      
## 10    10 Alitalia                           698012498 85_99      incide… 7      
## # … with 326 more rows, and abbreviated variable names ¹​type_of_event,
## #   ²​n_events
airlines %>% mutate(n_events = n_events %>% str_replace_all("[0]", "-"))
## # A tibble: 336 × 6
##      Ref airline               avail_seat_km_per_week year_range type_…¹ n_eve…²
##    <dbl> <chr>                                  <dbl> <chr>      <chr>   <chr>  
##  1    NA Aer Lingus                         320906734 85_99      incide… 2      
##  2     2 Aeroflot*                         1197672318 85_99      incide… 76     
##  3     3 Aerolineas Argentinas              385803648 85_99      incide… 6      
##  4     4 Aeromexico*                        596871813 85_99      incide… 3      
##  5     5 Air Canada                        1865253802 85_99      incide… 2      
##  6     6 Air France                        3004002661 85_99      incide… 14     
##  7     7 Air India*                         869253552 85_99      incide… 2      
##  8     8 Air New Zealand*                   710174817 85_99      incide… 3      
##  9     9 Alaska Airlines*                   965346773 85_99      incide… 5      
## 10    10 Alitalia                           698012498 85_99      incide… 7      
## # … with 326 more rows, and abbreviated variable names ¹​type_of_event,
## #   ²​n_events