Import your data

# excel file
airlines <- read_excel("../00_data/MyData.xlsx") %>%
     mutate(n_events = as.numeric(n_events)) %>%
    mutate(avail_seat_km_per_week = as.numeric(avail_seat_km_per_week))
airlines
## # A tibble: 336 × 6
##      Ref airline               avail_seat_km_per_week year_range type_…¹ n_eve…²
##    <dbl> <chr>                                  <dbl> <chr>      <chr>     <dbl>
##  1    NA Aer Lingus                         320906734 85_99      incide…       2
##  2     2 Aeroflot*                         1197672318 85_99      incide…      76
##  3     3 Aerolineas Argentinas              385803648 85_99      incide…       6
##  4     4 Aeromexico*                        596871813 85_99      incide…       3
##  5     5 Air Canada                        1865253802 85_99      incide…       2
##  6     6 Air France                        3004002661 85_99      incide…      14
##  7     7 Air India*                         869253552 85_99      incide…       2
##  8     8 Air New Zealand*                   710174817 85_99      incide…       3
##  9     9 Alaska Airlines*                   965346773 85_99      incide…       5
## 10    10 Alitalia                           698012498 85_99      incide…       7
## # … with 326 more rows, and abbreviated variable names ¹​type_of_event,
## #   ²​n_events
airlines
## # A tibble: 336 × 6
##      Ref airline               avail_seat_km_per_week year_range type_…¹ n_eve…²
##    <dbl> <chr>                                  <dbl> <chr>      <chr>     <dbl>
##  1    NA Aer Lingus                         320906734 85_99      incide…       2
##  2     2 Aeroflot*                         1197672318 85_99      incide…      76
##  3     3 Aerolineas Argentinas              385803648 85_99      incide…       6
##  4     4 Aeromexico*                        596871813 85_99      incide…       3
##  5     5 Air Canada                        1865253802 85_99      incide…       2
##  6     6 Air France                        3004002661 85_99      incide…      14
##  7     7 Air India*                         869253552 85_99      incide…       2
##  8     8 Air New Zealand*                   710174817 85_99      incide…       3
##  9     9 Alaska Airlines*                   965346773 85_99      incide…       5
## 10    10 Alitalia                           698012498 85_99      incide…       7
## # … with 326 more rows, and abbreviated variable names ¹​type_of_event,
## #   ²​n_events

Chapter 13

What are primary keys in your data?

airline, avail_seat_km_per_week, n_events, type_of_event

Can you divide your data into two?

Divide it using dplyr::select in a way the two have a common variable, which you could use to join the two.

airlines_1<- airlines %>% select(Ref:year_range)
airlines_1
## # A tibble: 336 × 4
##      Ref airline               avail_seat_km_per_week year_range
##    <dbl> <chr>                                  <dbl> <chr>     
##  1    NA Aer Lingus                         320906734 85_99     
##  2     2 Aeroflot*                         1197672318 85_99     
##  3     3 Aerolineas Argentinas              385803648 85_99     
##  4     4 Aeromexico*                        596871813 85_99     
##  5     5 Air Canada                        1865253802 85_99     
##  6     6 Air France                        3004002661 85_99     
##  7     7 Air India*                         869253552 85_99     
##  8     8 Air New Zealand*                   710174817 85_99     
##  9     9 Alaska Airlines*                   965346773 85_99     
## 10    10 Alitalia                           698012498 85_99     
## # … with 326 more rows
airlines_2 <- airlines %>% select(Ref, avail_seat_km_per_week, type_of_event:n_events)
airlines_2
## # A tibble: 336 × 4
##      Ref avail_seat_km_per_week type_of_event n_events
##    <dbl>                  <dbl> <chr>            <dbl>
##  1    NA              320906734 incidents            2
##  2     2             1197672318 incidents           76
##  3     3              385803648 incidents            6
##  4     4              596871813 incidents            3
##  5     5             1865253802 incidents            2
##  6     6             3004002661 incidents           14
##  7     7              869253552 incidents            2
##  8     8              710174817 incidents            3
##  9     9              965346773 incidents            5
## 10    10              698012498 incidents            7
## # … with 326 more rows

Alternative

airlines %>%
    mutate(row_id = row_number())
## # A tibble: 336 × 7
##      Ref airline               avail_seat_km_pe…¹ year_…² type_…³ n_eve…⁴ row_id
##    <dbl> <chr>                              <dbl> <chr>   <chr>     <dbl>  <int>
##  1    NA Aer Lingus                     320906734 85_99   incide…       2      1
##  2     2 Aeroflot*                     1197672318 85_99   incide…      76      2
##  3     3 Aerolineas Argentinas          385803648 85_99   incide…       6      3
##  4     4 Aeromexico*                    596871813 85_99   incide…       3      4
##  5     5 Air Canada                    1865253802 85_99   incide…       2      5
##  6     6 Air France                    3004002661 85_99   incide…      14      6
##  7     7 Air India*                     869253552 85_99   incide…       2      7
##  8     8 Air New Zealand*               710174817 85_99   incide…       3      8
##  9     9 Alaska Airlines*               965346773 85_99   incide…       5      9
## 10    10 Alitalia                       698012498 85_99   incide…       7     10
## # … with 326 more rows, and abbreviated variable names ¹​avail_seat_km_per_week,
## #   ²​year_range, ³​type_of_event, ⁴​n_events

Can you join the two together?

Use tidyr::left_join or other joining functions.

airlines_1 %>%
    left_join(airlines_2)
## Joining with `by = join_by(Ref, avail_seat_km_per_week)`
## # A tibble: 336 × 6
##      Ref airline               avail_seat_km_per_week year_range type_…¹ n_eve…²
##    <dbl> <chr>                                  <dbl> <chr>      <chr>     <dbl>
##  1    NA Aer Lingus                         320906734 85_99      incide…       2
##  2     2 Aeroflot*                         1197672318 85_99      incide…      76
##  3     3 Aerolineas Argentinas              385803648 85_99      incide…       6
##  4     4 Aeromexico*                        596871813 85_99      incide…       3
##  5     5 Air Canada                        1865253802 85_99      incide…       2
##  6     6 Air France                        3004002661 85_99      incide…      14
##  7     7 Air India*                         869253552 85_99      incide…       2
##  8     8 Air New Zealand*                   710174817 85_99      incide…       3
##  9     9 Alaska Airlines*                   965346773 85_99      incide…       5
## 10    10 Alitalia                           698012498 85_99      incide…       7
## # … with 326 more rows, and abbreviated variable names ¹​type_of_event,
## #   ²​n_events