Import your data
# excel file
airlines <- read_excel("../00_data/MyData.xlsx") %>%
mutate(n_events = as.numeric(n_events)) %>%
mutate(avail_seat_km_per_week = as.numeric(avail_seat_km_per_week))
airlines
## # A tibble: 336 × 6
## Ref airline avail_seat_km_per_week year_range type_…¹ n_eve…²
## <dbl> <chr> <dbl> <chr> <chr> <dbl>
## 1 NA Aer Lingus 320906734 85_99 incide… 2
## 2 2 Aeroflot* 1197672318 85_99 incide… 76
## 3 3 Aerolineas Argentinas 385803648 85_99 incide… 6
## 4 4 Aeromexico* 596871813 85_99 incide… 3
## 5 5 Air Canada 1865253802 85_99 incide… 2
## 6 6 Air France 3004002661 85_99 incide… 14
## 7 7 Air India* 869253552 85_99 incide… 2
## 8 8 Air New Zealand* 710174817 85_99 incide… 3
## 9 9 Alaska Airlines* 965346773 85_99 incide… 5
## 10 10 Alitalia 698012498 85_99 incide… 7
## # … with 326 more rows, and abbreviated variable names ¹type_of_event,
## # ²n_events
airlines
## # A tibble: 336 × 6
## Ref airline avail_seat_km_per_week year_range type_…¹ n_eve…²
## <dbl> <chr> <dbl> <chr> <chr> <dbl>
## 1 NA Aer Lingus 320906734 85_99 incide… 2
## 2 2 Aeroflot* 1197672318 85_99 incide… 76
## 3 3 Aerolineas Argentinas 385803648 85_99 incide… 6
## 4 4 Aeromexico* 596871813 85_99 incide… 3
## 5 5 Air Canada 1865253802 85_99 incide… 2
## 6 6 Air France 3004002661 85_99 incide… 14
## 7 7 Air India* 869253552 85_99 incide… 2
## 8 8 Air New Zealand* 710174817 85_99 incide… 3
## 9 9 Alaska Airlines* 965346773 85_99 incide… 5
## 10 10 Alitalia 698012498 85_99 incide… 7
## # … with 326 more rows, and abbreviated variable names ¹type_of_event,
## # ²n_events
Pivoting
airlines %>%
pivot_longer(cols = c(`avail_seat_km_per_week`, `n_events`),
names_to = "km",
values_to = "events")
## # A tibble: 672 × 6
## Ref airline year_range type_of_event km events
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 NA Aer Lingus 85_99 incidents avail_seat_km_pe… 3.21e8
## 2 NA Aer Lingus 85_99 incidents n_events 2 e0
## 3 2 Aeroflot* 85_99 incidents avail_seat_km_pe… 1.20e9
## 4 2 Aeroflot* 85_99 incidents n_events 7.6 e1
## 5 3 Aerolineas Argentinas 85_99 incidents avail_seat_km_pe… 3.86e8
## 6 3 Aerolineas Argentinas 85_99 incidents n_events 6 e0
## 7 4 Aeromexico* 85_99 incidents avail_seat_km_pe… 5.97e8
## 8 4 Aeromexico* 85_99 incidents n_events 3 e0
## 9 5 Air Canada 85_99 incidents avail_seat_km_pe… 1.87e9
## 10 5 Air Canada 85_99 incidents n_events 2 e0
## # … with 662 more rows
long to wide form
wide to long form
airlines %>%
pivot_wider(names_from = n_events,
values_from = avail_seat_km_per_week)
## # A tibble: 336 × 71
## Ref airline year_…¹ type_…² `2` `76` `6` `3` `14` `5`
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NA Aer Li… 85_99 incide… 3.21e8 NA NA NA NA NA
## 2 2 Aerofl… 85_99 incide… NA 1.20e9 NA NA NA NA
## 3 3 Aeroli… 85_99 incide… NA NA 3.86e8 NA NA NA
## 4 4 Aerome… 85_99 incide… NA NA NA 5.97e8 NA NA
## 5 5 Air Ca… 85_99 incide… 1.87e9 NA NA NA NA NA
## 6 6 Air Fr… 85_99 incide… NA NA NA NA 3.00e9 NA
## 7 7 Air In… 85_99 incide… 8.69e8 NA NA NA NA NA
## 8 8 Air Ne… 85_99 incide… NA NA NA 7.10e8 NA NA
## 9 9 Alaska… 85_99 incide… NA NA NA NA NA 9.65e8
## 10 10 Alital… 85_99 incide… NA NA NA NA NA NA
## # … with 326 more rows, 61 more variables: `7` <dbl>, `21` <dbl>, `1` <dbl>,
## # `4` <dbl>, `0` <dbl>, `12` <dbl>, `24` <dbl>, `8` <dbl>, `25` <dbl>,
## # `10` <dbl>, `19` <dbl>, `16` <dbl>, `9` <dbl>, `128` <dbl>, `64` <dbl>,
## # `79` <dbl>, `329` <dbl>, `50` <dbl>, `101` <dbl>, `323` <dbl>, `535` <dbl>,
## # `47` <dbl>, `407` <dbl>, `282` <dbl>, `167` <dbl>, `260` <dbl>,
## # `148` <dbl>, `520` <dbl>, `425` <dbl>, `34` <dbl>, `234` <dbl>, `74` <dbl>,
## # `51` <dbl>, `313` <dbl>, `159` <dbl>, `229` <dbl>, `98` <dbl>, …
Separating and Uniting
Unite two columns
airlines_united <- airlines%>%
unite(col = "totalevents", n_events:type_of_event, sep = "/")
airlines_united
## # A tibble: 336 × 5
## Ref airline avail_seat_km_per_week year_range totalevents
## <dbl> <chr> <dbl> <chr> <chr>
## 1 NA Aer Lingus 320906734 85_99 2/incidents
## 2 2 Aeroflot* 1197672318 85_99 76/incidents
## 3 3 Aerolineas Argentinas 385803648 85_99 6/incidents
## 4 4 Aeromexico* 596871813 85_99 3/incidents
## 5 5 Air Canada 1865253802 85_99 2/incidents
## 6 6 Air France 3004002661 85_99 14/incidents
## 7 7 Air India* 869253552 85_99 2/incidents
## 8 8 Air New Zealand* 710174817 85_99 3/incidents
## 9 9 Alaska Airlines* 965346773 85_99 5/incidents
## 10 10 Alitalia 698012498 85_99 7/incidents
## # … with 326 more rows
Separate a column
airlines_united %>%
separate(col = totalevents,
into = c("n_events", "type_of_event"),
sep = "/")
## # A tibble: 336 × 6
## Ref airline avail_seat_km_per_week year_range n_eve…¹ type_…²
## <dbl> <chr> <dbl> <chr> <chr> <chr>
## 1 NA Aer Lingus 320906734 85_99 2 incide…
## 2 2 Aeroflot* 1197672318 85_99 76 incide…
## 3 3 Aerolineas Argentinas 385803648 85_99 6 incide…
## 4 4 Aeromexico* 596871813 85_99 3 incide…
## 5 5 Air Canada 1865253802 85_99 2 incide…
## 6 6 Air France 3004002661 85_99 14 incide…
## 7 7 Air India* 869253552 85_99 2 incide…
## 8 8 Air New Zealand* 710174817 85_99 3 incide…
## 9 9 Alaska Airlines* 965346773 85_99 5 incide…
## 10 10 Alitalia 698012498 85_99 7 incide…
## # … with 326 more rows, and abbreviated variable names ¹n_events,
## # ²type_of_event
Missing Values
airlines <- tibble(
type_of_event = c("incident","fatalities"),
year_range = c("85_99", "00_14"),
n_events = c(76, 143)
)