Import data

# excel file
airlines <- read_excel("../00_data/MyData.xlsx") %>%
     mutate(n_events = as.numeric(n_events)) %>%
    mutate(avail_seat_km_per_week = as.numeric(avail_seat_km_per_week))
airlines
## # A tibble: 336 × 6
##      Ref airline               avail_seat_km_per_week year_range type_…¹ n_eve…²
##    <dbl> <chr>                                  <dbl> <chr>      <chr>     <dbl>
##  1    NA Aer Lingus                         320906734 85_99      incide…       2
##  2     2 Aeroflot*                         1197672318 85_99      incide…      76
##  3     3 Aerolineas Argentinas              385803648 85_99      incide…       6
##  4     4 Aeromexico*                        596871813 85_99      incide…       3
##  5     5 Air Canada                        1865253802 85_99      incide…       2
##  6     6 Air France                        3004002661 85_99      incide…      14
##  7     7 Air India*                         869253552 85_99      incide…       2
##  8     8 Air New Zealand*                   710174817 85_99      incide…       3
##  9     9 Alaska Airlines*                   965346773 85_99      incide…       5
## 10    10 Alitalia                           698012498 85_99      incide…       7
## # … with 326 more rows, and abbreviated variable names ¹​type_of_event,
## #   ²​n_events
airlines
## # A tibble: 336 × 6
##      Ref airline               avail_seat_km_per_week year_range type_…¹ n_eve…²
##    <dbl> <chr>                                  <dbl> <chr>      <chr>     <dbl>
##  1    NA Aer Lingus                         320906734 85_99      incide…       2
##  2     2 Aeroflot*                         1197672318 85_99      incide…      76
##  3     3 Aerolineas Argentinas              385803648 85_99      incide…       6
##  4     4 Aeromexico*                        596871813 85_99      incide…       3
##  5     5 Air Canada                        1865253802 85_99      incide…       2
##  6     6 Air France                        3004002661 85_99      incide…      14
##  7     7 Air India*                         869253552 85_99      incide…       2
##  8     8 Air New Zealand*                   710174817 85_99      incide…       3
##  9     9 Alaska Airlines*                   965346773 85_99      incide…       5
## 10    10 Alitalia                           698012498 85_99      incide…       7
## # … with 326 more rows, and abbreviated variable names ¹​type_of_event,
## #   ²​n_events

Apply the following dplyr verbs to your data

Filter rows

filter(airlines, type_of_event == "fatal_accidents")
## # A tibble: 112 × 6
##      Ref airline               avail_seat_km_per_week year_range type_…¹ n_eve…²
##    <dbl> <chr>                                  <dbl> <chr>      <chr>     <dbl>
##  1    57 Aer Lingus                         320906734 85_99      fatal_…       0
##  2    58 Aeroflot*                         1197672318 85_99      fatal_…      14
##  3    59 Aerolineas Argentinas              385803648 85_99      fatal_…       0
##  4    60 Aeromexico*                        596871813 85_99      fatal_…       1
##  5    61 Air Canada                        1865253802 85_99      fatal_…       0
##  6    62 Air France                        3004002661 85_99      fatal_…       4
##  7    63 Air India*                         869253552 85_99      fatal_…       1
##  8    64 Air New Zealand*                   710174817 85_99      fatal_…       0
##  9    65 Alaska Airlines*                   965346773 85_99      fatal_…       0
## 10    66 Alitalia                           698012498 85_99      fatal_…       2
## # … with 102 more rows, and abbreviated variable names ¹​type_of_event,
## #   ²​n_events

Arrange rows

arrange(airlines, desc(airline))
## # A tibble: 336 × 6
##      Ref airline         avail_seat_km_per_week year_range type_of_event n_eve…¹
##    <dbl> <chr>                            <dbl> <chr>      <chr>           <dbl>
##  1    56 Xiamen Airlines              430462962 85_99      incidents           9
##  2   112 Xiamen Airlines              430462962 85_99      fatal_accide…       1
##  3   168 Xiamen Airlines              430462962 85_99      fatalities         82
##  4   224 Xiamen Airlines              430462962 00_14      incidents           2
##  5   280 Xiamen Airlines              430462962 00_14      fatal_accide…       0
##  6   336 Xiamen Airlines              430462962 00_14      fatalities          0
##  7    55 Virgin Atlantic             1005248585 85_99      incidents           1
##  8   111 Virgin Atlantic             1005248585 85_99      fatal_accide…       0
##  9   167 Virgin Atlantic             1005248585 85_99      fatalities          0
## 10   223 Virgin Atlantic             1005248585 00_14      incidents           0
## # … with 326 more rows, and abbreviated variable name ¹​n_events

Select columns

select(airlines, airline, n_events)
## # A tibble: 336 × 2
##    airline               n_events
##    <chr>                    <dbl>
##  1 Aer Lingus                   2
##  2 Aeroflot*                   76
##  3 Aerolineas Argentinas        6
##  4 Aeromexico*                  3
##  5 Air Canada                   2
##  6 Air France                  14
##  7 Air India*                   2
##  8 Air New Zealand*             3
##  9 Alaska Airlines*             5
## 10 Alitalia                     7
## # … with 326 more rows
select(airlines, airline, n_events, year_range, type_of_event)
## # A tibble: 336 × 4
##    airline               n_events year_range type_of_event
##    <chr>                    <dbl> <chr>      <chr>        
##  1 Aer Lingus                   2 85_99      incidents    
##  2 Aeroflot*                   76 85_99      incidents    
##  3 Aerolineas Argentinas        6 85_99      incidents    
##  4 Aeromexico*                  3 85_99      incidents    
##  5 Air Canada                   2 85_99      incidents    
##  6 Air France                  14 85_99      incidents    
##  7 Air India*                   2 85_99      incidents    
##  8 Air New Zealand*             3 85_99      incidents    
##  9 Alaska Airlines*             5 85_99      incidents    
## 10 Alitalia                     7 85_99      incidents    
## # … with 326 more rows

Add columns

mutate(airlines,
       events_per_km = avail_seat_km_per_week / n_events) 
## # A tibble: 336 × 7
##      Ref airline               avail_seat_km_p…¹ year_…² type_…³ n_eve…⁴ event…⁵
##    <dbl> <chr>                             <dbl> <chr>   <chr>     <dbl>   <dbl>
##  1    NA Aer Lingus                    320906734 85_99   incide…       2  1.60e8
##  2     2 Aeroflot*                    1197672318 85_99   incide…      76  1.58e7
##  3     3 Aerolineas Argentinas         385803648 85_99   incide…       6  6.43e7
##  4     4 Aeromexico*                   596871813 85_99   incide…       3  1.99e8
##  5     5 Air Canada                   1865253802 85_99   incide…       2  9.33e8
##  6     6 Air France                   3004002661 85_99   incide…      14  2.15e8
##  7     7 Air India*                    869253552 85_99   incide…       2  4.35e8
##  8     8 Air New Zealand*              710174817 85_99   incide…       3  2.37e8
##  9     9 Alaska Airlines*              965346773 85_99   incide…       5  1.93e8
## 10    10 Alitalia                      698012498 85_99   incide…       7  9.97e7
## # … with 326 more rows, and abbreviated variable names ¹​avail_seat_km_per_week,
## #   ²​year_range, ³​type_of_event, ⁴​n_events, ⁵​events_per_km
# lag() lagging values in Reference column of data
select(airlines, Ref) %>%
    
    mutate(Ref_lag1 = lag(Ref))
## # A tibble: 336 × 2
##      Ref Ref_lag1
##    <dbl>    <dbl>
##  1    NA       NA
##  2     2       NA
##  3     3        2
##  4     4        3
##  5     5        4
##  6     6        5
##  7     7        6
##  8     8        7
##  9     9        8
## 10    10        9
## # … with 326 more rows

Summarize by groups

airlines
## # A tibble: 336 × 6
##      Ref airline               avail_seat_km_per_week year_range type_…¹ n_eve…²
##    <dbl> <chr>                                  <dbl> <chr>      <chr>     <dbl>
##  1    NA Aer Lingus                         320906734 85_99      incide…       2
##  2     2 Aeroflot*                         1197672318 85_99      incide…      76
##  3     3 Aerolineas Argentinas              385803648 85_99      incide…       6
##  4     4 Aeromexico*                        596871813 85_99      incide…       3
##  5     5 Air Canada                        1865253802 85_99      incide…       2
##  6     6 Air France                        3004002661 85_99      incide…      14
##  7     7 Air India*                         869253552 85_99      incide…       2
##  8     8 Air New Zealand*                   710174817 85_99      incide…       3
##  9     9 Alaska Airlines*                   965346773 85_99      incide…       5
## 10    10 Alitalia                           698012498 85_99      incide…       7
## # … with 326 more rows, and abbreviated variable names ¹​type_of_event,
## #   ²​n_events
# average departure delay
summarise(airlines, event = mean(n_events, na.rm = TRUE))
## # A tibble: 1 × 1
##   event
##   <dbl>
## 1  30.3

Summarize by group

airlines %>%
    
    # Group by airlines
    group_by(airline) %>%
    
    # Calculate average number of events
    summarise(event = mean(n_events, na.rm = TRUE)) %>%
    
    
    # Sort it
    arrange(event)
## # A tibble: 56 × 2
##    airline               event
##    <chr>                 <dbl>
##  1 TAP - Air Portugal    0    
##  2 Hawaiian Airlines     0.167
##  3 Finnair               0.167
##  4 Virgin Atlantic       0.167
##  5 Cathay Pacific*       0.333
##  6 Aer Lingus            0.333
##  7 Austrian Airlines     0.333
##  8 Air Canada            0.667
##  9 Qantas*               1    
## 10 Aerolineas Argentinas 1.17 
## # … with 46 more rows

```