Load the libraries and view the “flights” dataset

library(tidyverse)
library(nycflights13)
library(psych)
summary(flights)
##       year          month             day           dep_time    sched_dep_time
##  Min.   :2013   Min.   : 1.000   Min.   : 1.00   Min.   :   1   Min.   : 106  
##  1st Qu.:2013   1st Qu.: 4.000   1st Qu.: 8.00   1st Qu.: 907   1st Qu.: 906  
##  Median :2013   Median : 7.000   Median :16.00   Median :1401   Median :1359  
##  Mean   :2013   Mean   : 6.549   Mean   :15.71   Mean   :1349   Mean   :1344  
##  3rd Qu.:2013   3rd Qu.:10.000   3rd Qu.:23.00   3rd Qu.:1744   3rd Qu.:1729  
##  Max.   :2013   Max.   :12.000   Max.   :31.00   Max.   :2400   Max.   :2359  
##                                                  NA's   :8255                 
##    dep_delay          arr_time    sched_arr_time   arr_delay       
##  Min.   : -43.00   Min.   :   1   Min.   :   1   Min.   : -86.000  
##  1st Qu.:  -5.00   1st Qu.:1104   1st Qu.:1124   1st Qu.: -17.000  
##  Median :  -2.00   Median :1535   Median :1556   Median :  -5.000  
##  Mean   :  12.64   Mean   :1502   Mean   :1536   Mean   :   6.895  
##  3rd Qu.:  11.00   3rd Qu.:1940   3rd Qu.:1945   3rd Qu.:  14.000  
##  Max.   :1301.00   Max.   :2400   Max.   :2359   Max.   :1272.000  
##  NA's   :8255      NA's   :8713                  NA's   :9430      
##    carrier              flight       tailnum             origin         
##  Length:336776      Min.   :   1   Length:336776      Length:336776     
##  Class :character   1st Qu.: 553   Class :character   Class :character  
##  Mode  :character   Median :1496   Mode  :character   Mode  :character  
##                     Mean   :1972                                        
##                     3rd Qu.:3465                                        
##                     Max.   :8500                                        
##                                                                         
##      dest              air_time        distance         hour      
##  Length:336776      Min.   : 20.0   Min.   :  17   Min.   : 1.00  
##  Class :character   1st Qu.: 82.0   1st Qu.: 502   1st Qu.: 9.00  
##  Mode  :character   Median :129.0   Median : 872   Median :13.00  
##                     Mean   :150.7   Mean   :1040   Mean   :13.18  
##                     3rd Qu.:192.0   3rd Qu.:1389   3rd Qu.:17.00  
##                     Max.   :695.0   Max.   :4983   Max.   :23.00  
##                     NA's   :9430                                  
##      minute        time_hour                  
##  Min.   : 0.00   Min.   :2013-01-01 05:00:00  
##  1st Qu.: 8.00   1st Qu.:2013-04-04 13:00:00  
##  Median :29.00   Median :2013-07-03 10:00:00  
##  Mean   :26.23   Mean   :2013-07-03 05:22:54  
##  3rd Qu.:44.00   3rd Qu.:2013-10-01 07:00:00  
##  Max.   :59.00   Max.   :2013-12-31 23:00:00  
## 

Prepare dataframe for plotting

library(tidyverse)

#find top 10 airlines based on distance traveled
select(flights, carrier, distance) %>%
    group_by(carrier) %>%
    summarize(total = sum(distance)) %>%
    arrange(desc(total))
## # A tibble: 16 x 2
##    carrier    total
##    <chr>      <dbl>
##  1 UA      89705524
##  2 DL      59507317
##  3 B6      58384137
##  4 AA      43864584
##  5 EV      30498951
##  6 MQ      15033955
##  7 VX      12902327
##  8 WN      12229203
##  9 US      11365778
## 10 9E       9788152
## 11 FL       2167344
## 12 AS       1715028
## 13 HA       1704186
## 14 F9       1109700
## 15 YV        225395
## 16 OO         16026
my_df <- flights %>%
        select(carrier, month, distance) %>% #alluvial needs category, time-variable, value
        filter(carrier == c("UA", "DL", "B6", "AA", "EV", "MQ", "VX", "WN", "US", "9E")) %>%
        group_by(month, carrier) %>%
        summarize(total_dist = sum(distance)) %>%
        select(carrier, month, total_dist)
## Warning in carrier == c("UA", "DL", "B6", "AA", "EV", "MQ", "VX", "WN", : longer
## object length is not a multiple of shorter object length
my_df %>% drop_na()
## # A tibble: 120 x 3
## # Groups:   month [12]
##    carrier month total_dist
##    <chr>   <int>      <dbl>
##  1 9E          1      78685
##  2 AA          1     342364
##  3 B6          1     502188
##  4 DL          1     462127
##  5 EV          1     241320
##  6 MQ          1     126603
##  7 UA          1     676284
##  8 US          1      76927
##  9 VX          1      75272
## 10 WN          1     104466
## # … with 110 more rows

Which airlines are flying the most in 2013?

#library(plotly)
library(alluvial)
## Warning: package 'alluvial' was built under R version 4.0.2
alluvial_ts(my_df, wave = .3, ygap = 5, grid = TRUE, xlab = "Month", ylab = "Distance Traveled", border = NA, axis.cex = .8, leg.mode = F, leg.max = 250000, leg.y = .96, leg.cex = .7, title = "Airline Miles Traveled\nTop 10 in 2013")