# excel file
airlines <- read_excel("../00_data/MyData.xlsx") %>%
mutate(n_events = as.numeric(n_events)) %>%
mutate(avail_seat_km_per_week = as.numeric(avail_seat_km_per_week))
airlines
## # A tibble: 336 × 6
## Ref airline avail_seat_km_per_week year_range type_…¹ n_eve…²
## <dbl> <chr> <dbl> <chr> <chr> <dbl>
## 1 NA Aer Lingus 320906734 85_99 incide… 2
## 2 2 Aeroflot* 1197672318 85_99 incide… 76
## 3 3 Aerolineas Argentinas 385803648 85_99 incide… 6
## 4 4 Aeromexico* 596871813 85_99 incide… 3
## 5 5 Air Canada 1865253802 85_99 incide… 2
## 6 6 Air France 3004002661 85_99 incide… 14
## 7 7 Air India* 869253552 85_99 incide… 2
## 8 8 Air New Zealand* 710174817 85_99 incide… 3
## 9 9 Alaska Airlines* 965346773 85_99 incide… 5
## 10 10 Alitalia 698012498 85_99 incide… 7
## # … with 326 more rows, and abbreviated variable names ¹type_of_event,
## # ²n_events
airlines
## # A tibble: 336 × 6
## Ref airline avail_seat_km_per_week year_range type_…¹ n_eve…²
## <dbl> <chr> <dbl> <chr> <chr> <dbl>
## 1 NA Aer Lingus 320906734 85_99 incide… 2
## 2 2 Aeroflot* 1197672318 85_99 incide… 76
## 3 3 Aerolineas Argentinas 385803648 85_99 incide… 6
## 4 4 Aeromexico* 596871813 85_99 incide… 3
## 5 5 Air Canada 1865253802 85_99 incide… 2
## 6 6 Air France 3004002661 85_99 incide… 14
## 7 7 Air India* 869253552 85_99 incide… 2
## 8 8 Air New Zealand* 710174817 85_99 incide… 3
## 9 9 Alaska Airlines* 965346773 85_99 incide… 5
## 10 10 Alitalia 698012498 85_99 incide… 7
## # … with 326 more rows, and abbreviated variable names ¹type_of_event,
## # ²n_events
airlines %>%
summarise(sum(str_detect(year_range, "85_99$")))
## # A tibble: 1 × 1
## `sum(str_detect(year_range, "85_99$"))`
## <int>
## 1 168
str_detect(airlines$year_range, "85_99$")
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [109] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [145] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [277] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [289] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [301] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [313] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [325] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
sum(str_detect(airlines$year_range, "85_99$"))
## [1] 168
airlines_1 <- c("Air Canada", "Air France", "Condor", "Korean Air", "Air Lingus", "Southwest Airlines")
airline_match <- str_c(airlines_1, collapse = "|")
airline_match
## [1] "Air Canada|Air France|Condor|Korean Air|Air Lingus|Southwest Airlines"
str_extract(airlines_1, "Air France")
## [1] NA "Air France" NA NA NA
## [6] NA
airlines %>% mutate(n_events = n_events %>% str_replace("^[0]", "-"))
## # A tibble: 336 × 6
## Ref airline avail_seat_km_per_week year_range type_…¹ n_eve…²
## <dbl> <chr> <dbl> <chr> <chr> <chr>
## 1 NA Aer Lingus 320906734 85_99 incide… 2
## 2 2 Aeroflot* 1197672318 85_99 incide… 76
## 3 3 Aerolineas Argentinas 385803648 85_99 incide… 6
## 4 4 Aeromexico* 596871813 85_99 incide… 3
## 5 5 Air Canada 1865253802 85_99 incide… 2
## 6 6 Air France 3004002661 85_99 incide… 14
## 7 7 Air India* 869253552 85_99 incide… 2
## 8 8 Air New Zealand* 710174817 85_99 incide… 3
## 9 9 Alaska Airlines* 965346773 85_99 incide… 5
## 10 10 Alitalia 698012498 85_99 incide… 7
## # … with 326 more rows, and abbreviated variable names ¹type_of_event,
## # ²n_events
airlines %>% mutate(n_events = n_events %>% str_replace_all("[0]", "-"))
## # A tibble: 336 × 6
## Ref airline avail_seat_km_per_week year_range type_…¹ n_eve…²
## <dbl> <chr> <dbl> <chr> <chr> <chr>
## 1 NA Aer Lingus 320906734 85_99 incide… 2
## 2 2 Aeroflot* 1197672318 85_99 incide… 76
## 3 3 Aerolineas Argentinas 385803648 85_99 incide… 6
## 4 4 Aeromexico* 596871813 85_99 incide… 3
## 5 5 Air Canada 1865253802 85_99 incide… 2
## 6 6 Air France 3004002661 85_99 incide… 14
## 7 7 Air India* 869253552 85_99 incide… 2
## 8 8 Air New Zealand* 710174817 85_99 incide… 3
## 9 9 Alaska Airlines* 965346773 85_99 incide… 5
## 10 10 Alitalia 698012498 85_99 incide… 7
## # … with 326 more rows, and abbreviated variable names ¹type_of_event,
## # ²n_events