# excel file
airlines <- read_excel("../00_data/MyData.xlsx") %>%
mutate(n_events = as.numeric(n_events)) %>%
mutate(avail_seat_km_per_week = as.numeric(avail_seat_km_per_week))
airlines
## # A tibble: 336 × 6
## Ref airline avail_seat_km_per_week year_range type_…¹ n_eve…²
## <dbl> <chr> <dbl> <chr> <chr> <dbl>
## 1 NA Aer Lingus 320906734 85_99 incide… 2
## 2 2 Aeroflot* 1197672318 85_99 incide… 76
## 3 3 Aerolineas Argentinas 385803648 85_99 incide… 6
## 4 4 Aeromexico* 596871813 85_99 incide… 3
## 5 5 Air Canada 1865253802 85_99 incide… 2
## 6 6 Air France 3004002661 85_99 incide… 14
## 7 7 Air India* 869253552 85_99 incide… 2
## 8 8 Air New Zealand* 710174817 85_99 incide… 3
## 9 9 Alaska Airlines* 965346773 85_99 incide… 5
## 10 10 Alitalia 698012498 85_99 incide… 7
## # … with 326 more rows, and abbreviated variable names ¹type_of_event,
## # ²n_events
airlines
## # A tibble: 336 × 6
## Ref airline avail_seat_km_per_week year_range type_…¹ n_eve…²
## <dbl> <chr> <dbl> <chr> <chr> <dbl>
## 1 NA Aer Lingus 320906734 85_99 incide… 2
## 2 2 Aeroflot* 1197672318 85_99 incide… 76
## 3 3 Aerolineas Argentinas 385803648 85_99 incide… 6
## 4 4 Aeromexico* 596871813 85_99 incide… 3
## 5 5 Air Canada 1865253802 85_99 incide… 2
## 6 6 Air France 3004002661 85_99 incide… 14
## 7 7 Air India* 869253552 85_99 incide… 2
## 8 8 Air New Zealand* 710174817 85_99 incide… 3
## 9 9 Alaska Airlines* 965346773 85_99 incide… 5
## 10 10 Alitalia 698012498 85_99 incide… 7
## # … with 326 more rows, and abbreviated variable names ¹type_of_event,
## # ²n_events
Make two bar charts here - one before ordering another after
# Transform data: calculate average tv hours by religion
avg_airlines_events <- airlines %>%
group_by(type_of_event) %>%
summarise(
avg_incidents = mean(n_events, na.rm = TRUE)
)
avg_airlines_events
## # A tibble: 3 × 2
## type_of_event avg_incidents
## <chr> <dbl>
## 1 fatal_accidents 1.42
## 2 fatalities 84.0
## 3 incidents 5.65
# Plot
avg_airlines_events %>%
ggplot(aes(x = avg_incidents, y = type_of_event)) +
geom_point()
avg_airlines_events %>%
ggplot(aes(x = avg_incidents,
y = fct_reorder(.f = type_of_event, .x = avg_incidents))) + geom_point() +
# Labeling
labs(y = NULL, x = "Mean Incidents per event")
Show examples of three functions:
airlines %>% distinct(type_of_event)
## # A tibble: 3 × 1
## type_of_event
## <chr>
## 1 incidents
## 2 fatal_accidents
## 3 fatalities
# Recode
airlines %>%
# Rename levels
mutate(year_range = fct_recode(type_of_event,
"85_99" = "incidents")) %>%
select(type_of_event, year_range) %>%
filter(type_of_event == "incidents")
## # A tibble: 112 × 2
## type_of_event year_range
## <chr> <fct>
## 1 incidents 85_99
## 2 incidents 85_99
## 3 incidents 85_99
## 4 incidents 85_99
## 5 incidents 85_99
## 6 incidents 85_99
## 7 incidents 85_99
## 8 incidents 85_99
## 9 incidents 85_99
## 10 incidents 85_99
## # … with 102 more rows
# Collapse multiple levels into one
airlines %>%
mutate(year_range = fct_collapse(type_of_event, "85_99" = c("incidents", "fatalities", "fatal_accidents"))) %>%
select(type_of_event, year_range) %>%
filter(type_of_event != "00_14")
## # A tibble: 336 × 2
## type_of_event year_range
## <chr> <fct>
## 1 incidents 85_99
## 2 incidents 85_99
## 3 incidents 85_99
## 4 incidents 85_99
## 5 incidents 85_99
## 6 incidents 85_99
## 7 incidents 85_99
## 8 incidents 85_99
## 9 incidents 85_99
## 10 incidents 85_99
## # … with 326 more rows
# Lump small levels into other levels
airlines %>% count(type_of_event)
## # A tibble: 3 × 2
## type_of_event n
## <chr> <int>
## 1 fatal_accidents 112
## 2 fatalities 112
## 3 incidents 112
airlines %>% mutate(type_of_event = fct_lump(type_of_event)) %>% distinct(type_of_event)
## # A tibble: 3 × 1
## type_of_event
## <fct>
## 1 incidents
## 2 fatal_accidents
## 3 fatalities
No need to do anything here.