Import your data

# excel file
airlines <- read_excel("../00_data/MyData.xlsx") %>%
     mutate(n_events = as.numeric(n_events)) %>%
    mutate(avail_seat_km_per_week = as.numeric(avail_seat_km_per_week))
airlines
## # A tibble: 336 × 6
##      Ref airline               avail_seat_km_per_week year_range type_…¹ n_eve…²
##    <dbl> <chr>                                  <dbl> <chr>      <chr>     <dbl>
##  1    NA Aer Lingus                         320906734 85_99      incide…       2
##  2     2 Aeroflot*                         1197672318 85_99      incide…      76
##  3     3 Aerolineas Argentinas              385803648 85_99      incide…       6
##  4     4 Aeromexico*                        596871813 85_99      incide…       3
##  5     5 Air Canada                        1865253802 85_99      incide…       2
##  6     6 Air France                        3004002661 85_99      incide…      14
##  7     7 Air India*                         869253552 85_99      incide…       2
##  8     8 Air New Zealand*                   710174817 85_99      incide…       3
##  9     9 Alaska Airlines*                   965346773 85_99      incide…       5
## 10    10 Alitalia                           698012498 85_99      incide…       7
## # … with 326 more rows, and abbreviated variable names ¹​type_of_event,
## #   ²​n_events
airlines
## # A tibble: 336 × 6
##      Ref airline               avail_seat_km_per_week year_range type_…¹ n_eve…²
##    <dbl> <chr>                                  <dbl> <chr>      <chr>     <dbl>
##  1    NA Aer Lingus                         320906734 85_99      incide…       2
##  2     2 Aeroflot*                         1197672318 85_99      incide…      76
##  3     3 Aerolineas Argentinas              385803648 85_99      incide…       6
##  4     4 Aeromexico*                        596871813 85_99      incide…       3
##  5     5 Air Canada                        1865253802 85_99      incide…       2
##  6     6 Air France                        3004002661 85_99      incide…      14
##  7     7 Air India*                         869253552 85_99      incide…       2
##  8     8 Air New Zealand*                   710174817 85_99      incide…       3
##  9     9 Alaska Airlines*                   965346773 85_99      incide…       5
## 10    10 Alitalia                           698012498 85_99      incide…       7
## # … with 326 more rows, and abbreviated variable names ¹​type_of_event,
## #   ²​n_events

Chapter 15

Create a factor

Modify factor order

Make two bar charts here - one before ordering another after

# Transform data: calculate average tv hours by religion
avg_airlines_events <- airlines %>% 
    
    group_by(type_of_event) %>%
    summarise(
        avg_incidents = mean(n_events, na.rm = TRUE)
    )
avg_airlines_events
## # A tibble: 3 × 2
##   type_of_event   avg_incidents
##   <chr>                   <dbl>
## 1 fatal_accidents          1.42
## 2 fatalities              84.0 
## 3 incidents                5.65
# Plot
avg_airlines_events %>%
    
    ggplot(aes(x = avg_incidents, y = type_of_event)) +
    geom_point()

avg_airlines_events %>%
    
    ggplot(aes(x = avg_incidents,
               y = fct_reorder(.f = type_of_event, .x = avg_incidents))) + geom_point() +
    
    # Labeling
    labs(y = NULL, x = "Mean Incidents per event")

Modify factor levels

Show examples of three functions:

  • fct_recode
  • fct_collapse
  • fct_lump
airlines %>% distinct(type_of_event)
## # A tibble: 3 × 1
##   type_of_event  
##   <chr>          
## 1 incidents      
## 2 fatal_accidents
## 3 fatalities
# Recode
airlines %>%
    
    # Rename levels
    mutate(year_range = fct_recode(type_of_event, 
    "85_99" = "incidents")) %>%
    select(type_of_event, year_range) %>%
    filter(type_of_event == "incidents")
## # A tibble: 112 × 2
##    type_of_event year_range
##    <chr>         <fct>     
##  1 incidents     85_99     
##  2 incidents     85_99     
##  3 incidents     85_99     
##  4 incidents     85_99     
##  5 incidents     85_99     
##  6 incidents     85_99     
##  7 incidents     85_99     
##  8 incidents     85_99     
##  9 incidents     85_99     
## 10 incidents     85_99     
## # … with 102 more rows
# Collapse multiple levels into one
airlines %>%
    
    mutate(year_range = fct_collapse(type_of_event, "85_99" = c("incidents", "fatalities", "fatal_accidents"))) %>%
    select(type_of_event, year_range) %>%
    filter(type_of_event != "00_14")
## # A tibble: 336 × 2
##    type_of_event year_range
##    <chr>         <fct>     
##  1 incidents     85_99     
##  2 incidents     85_99     
##  3 incidents     85_99     
##  4 incidents     85_99     
##  5 incidents     85_99     
##  6 incidents     85_99     
##  7 incidents     85_99     
##  8 incidents     85_99     
##  9 incidents     85_99     
## 10 incidents     85_99     
## # … with 326 more rows
# Lump small levels into other levels
airlines %>% count(type_of_event)
## # A tibble: 3 × 2
##   type_of_event       n
##   <chr>           <int>
## 1 fatal_accidents   112
## 2 fatalities        112
## 3 incidents         112
airlines %>% mutate(type_of_event = fct_lump(type_of_event)) %>% distinct(type_of_event)
## # A tibble: 3 × 1
##   type_of_event  
##   <fct>          
## 1 incidents      
## 2 fatal_accidents
## 3 fatalities

Chapter 16

No need to do anything here.