# excel file
airlines <- read_excel("../00_data/MyData.xlsx") %>%
mutate(n_events = as.numeric(n_events)) %>%
mutate(avail_seat_km_per_week = as.numeric(avail_seat_km_per_week))
airlines
## # A tibble: 336 × 6
## Ref airline avail_seat_km_per_week year_range type_…¹ n_eve…²
## <dbl> <chr> <dbl> <chr> <chr> <dbl>
## 1 NA Aer Lingus 320906734 85_99 incide… 2
## 2 2 Aeroflot* 1197672318 85_99 incide… 76
## 3 3 Aerolineas Argentinas 385803648 85_99 incide… 6
## 4 4 Aeromexico* 596871813 85_99 incide… 3
## 5 5 Air Canada 1865253802 85_99 incide… 2
## 6 6 Air France 3004002661 85_99 incide… 14
## 7 7 Air India* 869253552 85_99 incide… 2
## 8 8 Air New Zealand* 710174817 85_99 incide… 3
## 9 9 Alaska Airlines* 965346773 85_99 incide… 5
## 10 10 Alitalia 698012498 85_99 incide… 7
## # … with 326 more rows, and abbreviated variable names ¹type_of_event,
## # ²n_events
airlines
## # A tibble: 336 × 6
## Ref airline avail_seat_km_per_week year_range type_…¹ n_eve…²
## <dbl> <chr> <dbl> <chr> <chr> <dbl>
## 1 NA Aer Lingus 320906734 85_99 incide… 2
## 2 2 Aeroflot* 1197672318 85_99 incide… 76
## 3 3 Aerolineas Argentinas 385803648 85_99 incide… 6
## 4 4 Aeromexico* 596871813 85_99 incide… 3
## 5 5 Air Canada 1865253802 85_99 incide… 2
## 6 6 Air France 3004002661 85_99 incide… 14
## 7 7 Air India* 869253552 85_99 incide… 2
## 8 8 Air New Zealand* 710174817 85_99 incide… 3
## 9 9 Alaska Airlines* 965346773 85_99 incide… 5
## 10 10 Alitalia 698012498 85_99 incide… 7
## # … with 326 more rows, and abbreviated variable names ¹type_of_event,
## # ²n_events
filter(airlines, type_of_event == "fatal_accidents")
## # A tibble: 112 × 6
## Ref airline avail_seat_km_per_week year_range type_…¹ n_eve…²
## <dbl> <chr> <dbl> <chr> <chr> <dbl>
## 1 57 Aer Lingus 320906734 85_99 fatal_… 0
## 2 58 Aeroflot* 1197672318 85_99 fatal_… 14
## 3 59 Aerolineas Argentinas 385803648 85_99 fatal_… 0
## 4 60 Aeromexico* 596871813 85_99 fatal_… 1
## 5 61 Air Canada 1865253802 85_99 fatal_… 0
## 6 62 Air France 3004002661 85_99 fatal_… 4
## 7 63 Air India* 869253552 85_99 fatal_… 1
## 8 64 Air New Zealand* 710174817 85_99 fatal_… 0
## 9 65 Alaska Airlines* 965346773 85_99 fatal_… 0
## 10 66 Alitalia 698012498 85_99 fatal_… 2
## # … with 102 more rows, and abbreviated variable names ¹type_of_event,
## # ²n_events
arrange(airlines, desc(airline))
## # A tibble: 336 × 6
## Ref airline avail_seat_km_per_week year_range type_of_event n_eve…¹
## <dbl> <chr> <dbl> <chr> <chr> <dbl>
## 1 56 Xiamen Airlines 430462962 85_99 incidents 9
## 2 112 Xiamen Airlines 430462962 85_99 fatal_accide… 1
## 3 168 Xiamen Airlines 430462962 85_99 fatalities 82
## 4 224 Xiamen Airlines 430462962 00_14 incidents 2
## 5 280 Xiamen Airlines 430462962 00_14 fatal_accide… 0
## 6 336 Xiamen Airlines 430462962 00_14 fatalities 0
## 7 55 Virgin Atlantic 1005248585 85_99 incidents 1
## 8 111 Virgin Atlantic 1005248585 85_99 fatal_accide… 0
## 9 167 Virgin Atlantic 1005248585 85_99 fatalities 0
## 10 223 Virgin Atlantic 1005248585 00_14 incidents 0
## # … with 326 more rows, and abbreviated variable name ¹n_events
select(airlines, airline, n_events)
## # A tibble: 336 × 2
## airline n_events
## <chr> <dbl>
## 1 Aer Lingus 2
## 2 Aeroflot* 76
## 3 Aerolineas Argentinas 6
## 4 Aeromexico* 3
## 5 Air Canada 2
## 6 Air France 14
## 7 Air India* 2
## 8 Air New Zealand* 3
## 9 Alaska Airlines* 5
## 10 Alitalia 7
## # … with 326 more rows
select(airlines, airline, n_events, year_range, type_of_event)
## # A tibble: 336 × 4
## airline n_events year_range type_of_event
## <chr> <dbl> <chr> <chr>
## 1 Aer Lingus 2 85_99 incidents
## 2 Aeroflot* 76 85_99 incidents
## 3 Aerolineas Argentinas 6 85_99 incidents
## 4 Aeromexico* 3 85_99 incidents
## 5 Air Canada 2 85_99 incidents
## 6 Air France 14 85_99 incidents
## 7 Air India* 2 85_99 incidents
## 8 Air New Zealand* 3 85_99 incidents
## 9 Alaska Airlines* 5 85_99 incidents
## 10 Alitalia 7 85_99 incidents
## # … with 326 more rows
mutate(airlines,
events_per_km = avail_seat_km_per_week / n_events)
## # A tibble: 336 × 7
## Ref airline avail_seat_km_p…¹ year_…² type_…³ n_eve…⁴ event…⁵
## <dbl> <chr> <dbl> <chr> <chr> <dbl> <dbl>
## 1 NA Aer Lingus 320906734 85_99 incide… 2 1.60e8
## 2 2 Aeroflot* 1197672318 85_99 incide… 76 1.58e7
## 3 3 Aerolineas Argentinas 385803648 85_99 incide… 6 6.43e7
## 4 4 Aeromexico* 596871813 85_99 incide… 3 1.99e8
## 5 5 Air Canada 1865253802 85_99 incide… 2 9.33e8
## 6 6 Air France 3004002661 85_99 incide… 14 2.15e8
## 7 7 Air India* 869253552 85_99 incide… 2 4.35e8
## 8 8 Air New Zealand* 710174817 85_99 incide… 3 2.37e8
## 9 9 Alaska Airlines* 965346773 85_99 incide… 5 1.93e8
## 10 10 Alitalia 698012498 85_99 incide… 7 9.97e7
## # … with 326 more rows, and abbreviated variable names ¹avail_seat_km_per_week,
## # ²year_range, ³type_of_event, ⁴n_events, ⁵events_per_km
# lag() lagging values in Reference column of data
select(airlines, Ref) %>%
mutate(Ref_lag1 = lag(Ref))
## # A tibble: 336 × 2
## Ref Ref_lag1
## <dbl> <dbl>
## 1 NA NA
## 2 2 NA
## 3 3 2
## 4 4 3
## 5 5 4
## 6 6 5
## 7 7 6
## 8 8 7
## 9 9 8
## 10 10 9
## # … with 326 more rows
airlines
## # A tibble: 336 × 6
## Ref airline avail_seat_km_per_week year_range type_…¹ n_eve…²
## <dbl> <chr> <dbl> <chr> <chr> <dbl>
## 1 NA Aer Lingus 320906734 85_99 incide… 2
## 2 2 Aeroflot* 1197672318 85_99 incide… 76
## 3 3 Aerolineas Argentinas 385803648 85_99 incide… 6
## 4 4 Aeromexico* 596871813 85_99 incide… 3
## 5 5 Air Canada 1865253802 85_99 incide… 2
## 6 6 Air France 3004002661 85_99 incide… 14
## 7 7 Air India* 869253552 85_99 incide… 2
## 8 8 Air New Zealand* 710174817 85_99 incide… 3
## 9 9 Alaska Airlines* 965346773 85_99 incide… 5
## 10 10 Alitalia 698012498 85_99 incide… 7
## # … with 326 more rows, and abbreviated variable names ¹type_of_event,
## # ²n_events
# average departure delay
summarise(airlines, event = mean(n_events, na.rm = TRUE))
## # A tibble: 1 × 1
## event
## <dbl>
## 1 30.3
Summarize by group
airlines %>%
# Group by airlines
group_by(airline) %>%
# Calculate average number of events
summarise(event = mean(n_events, na.rm = TRUE)) %>%
# Sort it
arrange(event)
## # A tibble: 56 × 2
## airline event
## <chr> <dbl>
## 1 TAP - Air Portugal 0
## 2 Hawaiian Airlines 0.167
## 3 Finnair 0.167
## 4 Virgin Atlantic 0.167
## 5 Cathay Pacific* 0.333
## 6 Aer Lingus 0.333
## 7 Austrian Airlines 0.333
## 8 Air Canada 0.667
## 9 Qantas* 1
## 10 Aerolineas Argentinas 1.17
## # … with 46 more rows
```