Evan Klein / D590 / Fall 2025

library(hflights)
summary(hflights)
##       Year          Month          DayofMonth      DayOfWeek        DepTime    
##  Min.   :2011   Min.   : 1.000   Min.   : 1.00   Min.   :1.000   Min.   :   1  
##  1st Qu.:2011   1st Qu.: 4.000   1st Qu.: 8.00   1st Qu.:2.000   1st Qu.:1021  
##  Median :2011   Median : 7.000   Median :16.00   Median :4.000   Median :1416  
##  Mean   :2011   Mean   : 6.514   Mean   :15.74   Mean   :3.948   Mean   :1396  
##  3rd Qu.:2011   3rd Qu.: 9.000   3rd Qu.:23.00   3rd Qu.:6.000   3rd Qu.:1801  
##  Max.   :2011   Max.   :12.000   Max.   :31.00   Max.   :7.000   Max.   :2400  
##                                                                  NA's   :2905  
##     ArrTime     UniqueCarrier        FlightNum      TailNum         
##  Min.   :   1   Length:227496      Min.   :   1   Length:227496     
##  1st Qu.:1215   Class :character   1st Qu.: 855   Class :character  
##  Median :1617   Mode  :character   Median :1696   Mode  :character  
##  Mean   :1578                      Mean   :1962                     
##  3rd Qu.:1953                      3rd Qu.:2755                     
##  Max.   :2400                      Max.   :7290                     
##  NA's   :3066                                                       
##  ActualElapsedTime    AirTime         ArrDelay          DepDelay      
##  Min.   : 34.0     Min.   : 11.0   Min.   :-70.000   Min.   :-33.000  
##  1st Qu.: 77.0     1st Qu.: 58.0   1st Qu.: -8.000   1st Qu.: -3.000  
##  Median :128.0     Median :107.0   Median :  0.000   Median :  0.000  
##  Mean   :129.3     Mean   :108.1   Mean   :  7.094   Mean   :  9.445  
##  3rd Qu.:165.0     3rd Qu.:141.0   3rd Qu.: 11.000   3rd Qu.:  9.000  
##  Max.   :575.0     Max.   :549.0   Max.   :978.000   Max.   :981.000  
##  NA's   :3622      NA's   :3622    NA's   :3622      NA's   :2905     
##     Origin              Dest              Distance          TaxiIn       
##  Length:227496      Length:227496      Min.   :  79.0   Min.   :  1.000  
##  Class :character   Class :character   1st Qu.: 376.0   1st Qu.:  4.000  
##  Mode  :character   Mode  :character   Median : 809.0   Median :  5.000  
##                                        Mean   : 787.8   Mean   :  6.099  
##                                        3rd Qu.:1042.0   3rd Qu.:  7.000  
##                                        Max.   :3904.0   Max.   :165.000  
##                                                         NA's   :3066     
##     TaxiOut         Cancelled       CancellationCode      Diverted       
##  Min.   :  1.00   Min.   :0.00000   Length:227496      Min.   :0.000000  
##  1st Qu.: 10.00   1st Qu.:0.00000   Class :character   1st Qu.:0.000000  
##  Median : 14.00   Median :0.00000   Mode  :character   Median :0.000000  
##  Mean   : 15.09   Mean   :0.01307                      Mean   :0.002853  
##  3rd Qu.: 18.00   3rd Qu.:0.00000                      3rd Qu.:0.000000  
##  Max.   :163.00   Max.   :1.00000                      Max.   :1.000000  
##  NA's   :2947
sum(is.na(hflights)==TRUE)
## [1] 25755
table(complete.cases(hflights))
## 
##  FALSE   TRUE 
##   3622 223874
prop.table(table(complete.cases(hflights))) * 100
## 
##     FALSE      TRUE 
##  1.592116 98.407884
sort(sapply(hflights, function(x) sum(is.na(x))))
##              Year             Month        DayofMonth         DayOfWeek 
##                 0                 0                 0                 0 
##     UniqueCarrier         FlightNum           TailNum            Origin 
##                 0                 0                 0                 0 
##              Dest          Distance         Cancelled  CancellationCode 
##                 0                 0                 0                 0 
##          Diverted           DepTime          DepDelay           TaxiOut 
##                 0              2905              2905              2947 
##           ArrTime            TaxiIn ActualElapsedTime           AirTime 
##              3066              3066              3622              3622 
##          ArrDelay 
##              3622

Initial visualizations

library(visdat)
vis_dat(hflights, warn_large_data = FALSE)

vis_miss(hflights, warn_large_data = FALSE)

Looks like we have a small amount of missing data in a handful of columns. Additionally, the missingness does not seem particularly random - there seems to be blocks present in our dataset where data does not exist for multiple columns. The columns with missing data are:

vals <- sapply(hflights, function(x) sum(is.na(x)))
names(vals[vals > 0])
## [1] "DepTime"           "ArrTime"           "ActualElapsedTime"
## [4] "AirTime"           "ArrDelay"          "DepDelay"         
## [7] "TaxiIn"            "TaxiOut"
library(naniar)
library(ggplot2)

ggplot(hflights, aes(x = DepTime, y = ArrTime)) +
  geom_miss_point() +
  facet_wrap(~Month) +
  theme_dark()

gg_miss_var(hflights)

gg_miss_var(hflights, facet = Month)

as_shadow(hflights)
## # A tibble: 227,496 × 21
##    Year_NA Month_NA DayofMonth_NA DayOfWeek_NA DepTime_NA ArrTime_NA
##    <fct>   <fct>    <fct>         <fct>        <fct>      <fct>     
##  1 !NA     !NA      !NA           !NA          !NA        !NA       
##  2 !NA     !NA      !NA           !NA          !NA        !NA       
##  3 !NA     !NA      !NA           !NA          !NA        !NA       
##  4 !NA     !NA      !NA           !NA          !NA        !NA       
##  5 !NA     !NA      !NA           !NA          !NA        !NA       
##  6 !NA     !NA      !NA           !NA          !NA        !NA       
##  7 !NA     !NA      !NA           !NA          !NA        !NA       
##  8 !NA     !NA      !NA           !NA          !NA        !NA       
##  9 !NA     !NA      !NA           !NA          !NA        !NA       
## 10 !NA     !NA      !NA           !NA          !NA        !NA       
## # ℹ 227,486 more rows
## # ℹ 15 more variables: UniqueCarrier_NA <fct>, FlightNum_NA <fct>,
## #   TailNum_NA <fct>, ActualElapsedTime_NA <fct>, AirTime_NA <fct>,
## #   ArrDelay_NA <fct>, DepDelay_NA <fct>, Origin_NA <fct>, Dest_NA <fct>,
## #   Distance_NA <fct>, TaxiIn_NA <fct>, TaxiOut_NA <fct>, Cancelled_NA <fct>,
## #   CancellationCode_NA <fct>, Diverted_NA <fct>
hflights_shadow <- bind_shadow(hflights)
hflights_nab <- nabular(hflights)

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
glimpse(hflights_shadow)
## Rows: 227,496
## Columns: 42
## $ Year                 <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2…
## $ Month                <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ DayofMonth           <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15…
## $ DayOfWeek            <int> 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1…
## $ DepTime              <int> 1400, 1401, 1352, 1403, 1405, 1359, 1359, 1355, 1…
## $ ArrTime              <int> 1500, 1501, 1502, 1513, 1507, 1503, 1509, 1454, 1…
## $ UniqueCarrier        <chr> "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "…
## $ FlightNum            <int> 428, 428, 428, 428, 428, 428, 428, 428, 428, 428,…
## $ TailNum              <chr> "N576AA", "N557AA", "N541AA", "N403AA", "N492AA",…
## $ ActualElapsedTime    <int> 60, 60, 70, 70, 62, 64, 70, 59, 71, 70, 70, 56, 6…
## $ AirTime              <int> 40, 45, 48, 39, 44, 45, 43, 40, 41, 45, 42, 41, 4…
## $ ArrDelay             <int> -10, -9, -8, 3, -3, -7, -1, -16, 44, 43, 29, 5, -…
## $ DepDelay             <int> 0, 1, -8, 3, 5, -1, -1, -5, 43, 43, 29, 19, -2, -…
## $ Origin               <chr> "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", …
## $ Dest                 <chr> "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", …
## $ Distance             <int> 224, 224, 224, 224, 224, 224, 224, 224, 224, 224,…
## $ TaxiIn               <int> 7, 6, 5, 9, 9, 6, 12, 7, 8, 6, 8, 4, 6, 5, 6, 12,…
## $ TaxiOut              <int> 13, 9, 17, 22, 9, 13, 15, 12, 22, 19, 20, 11, 13,…
## $ Cancelled            <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CancellationCode     <chr> "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ Diverted             <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Year_NA              <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ Month_NA             <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ DayofMonth_NA        <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ DayOfWeek_NA         <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ DepTime_NA           <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ ArrTime_NA           <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ UniqueCarrier_NA     <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ FlightNum_NA         <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ TailNum_NA           <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ ActualElapsedTime_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ AirTime_NA           <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ ArrDelay_NA          <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ DepDelay_NA          <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ Origin_NA            <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ Dest_NA              <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ Distance_NA          <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ TaxiIn_NA            <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ TaxiOut_NA           <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ Cancelled_NA         <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ CancellationCode_NA  <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ Diverted_NA          <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
glimpse(hflights_nab)
## Rows: 227,496
## Columns: 42
## $ Year                 <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2…
## $ Month                <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ DayofMonth           <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15…
## $ DayOfWeek            <int> 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1…
## $ DepTime              <int> 1400, 1401, 1352, 1403, 1405, 1359, 1359, 1355, 1…
## $ ArrTime              <int> 1500, 1501, 1502, 1513, 1507, 1503, 1509, 1454, 1…
## $ UniqueCarrier        <chr> "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "…
## $ FlightNum            <int> 428, 428, 428, 428, 428, 428, 428, 428, 428, 428,…
## $ TailNum              <chr> "N576AA", "N557AA", "N541AA", "N403AA", "N492AA",…
## $ ActualElapsedTime    <int> 60, 60, 70, 70, 62, 64, 70, 59, 71, 70, 70, 56, 6…
## $ AirTime              <int> 40, 45, 48, 39, 44, 45, 43, 40, 41, 45, 42, 41, 4…
## $ ArrDelay             <int> -10, -9, -8, 3, -3, -7, -1, -16, 44, 43, 29, 5, -…
## $ DepDelay             <int> 0, 1, -8, 3, 5, -1, -1, -5, 43, 43, 29, 19, -2, -…
## $ Origin               <chr> "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", …
## $ Dest                 <chr> "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", …
## $ Distance             <int> 224, 224, 224, 224, 224, 224, 224, 224, 224, 224,…
## $ TaxiIn               <int> 7, 6, 5, 9, 9, 6, 12, 7, 8, 6, 8, 4, 6, 5, 6, 12,…
## $ TaxiOut              <int> 13, 9, 17, 22, 9, 13, 15, 12, 22, 19, 20, 11, 13,…
## $ Cancelled            <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CancellationCode     <chr> "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ Diverted             <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Year_NA              <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ Month_NA             <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ DayofMonth_NA        <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ DayOfWeek_NA         <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ DepTime_NA           <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ ArrTime_NA           <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ UniqueCarrier_NA     <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ FlightNum_NA         <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ TailNum_NA           <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ ActualElapsedTime_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ AirTime_NA           <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ ArrDelay_NA          <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ DepDelay_NA          <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ Origin_NA            <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ Dest_NA              <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ Distance_NA          <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ TaxiIn_NA            <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ TaxiOut_NA           <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ Cancelled_NA         <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ CancellationCode_NA  <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
## $ Diverted_NA          <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA,…
all.equal(hflights_shadow, hflights_nab)
## [1] TRUE
hflights %>%
  bind_shadow() %>%
  group_by(ArrDelay_NA) %>%
  summarise_at(.vars = "Month",
               .funs = c("mean", "sd", "var", "min", "max"),
               na.rm = TRUE)
## # A tibble: 2 × 6
##   ArrDelay_NA  mean    sd   var   min   max
##   <fct>       <dbl> <dbl> <dbl> <int> <int>
## 1 !NA          6.54  3.41  11.7     1    12
## 2 NA           4.96  3.32  11.0     1    12
ggplot(hflights_shadow, aes(x = Distance, colour = ArrDelay_NA)) +
  geom_density()

hflights %>%
  bind_shadow() %>%
  ggplot(aes(x = Distance, fill = DepDelay_NA)) +
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

library(simputation)
## 
## Attaching package: 'simputation'
## The following object is masked from 'package:naniar':
## 
##     impute_median
library(dplyr)

hflights %>%
  impute_lm(DepDelay ~ Month + Year) %>%
  ggplot(aes(x = Month, y = DepDelay)) +
  geom_point()

  hflights_shadow %>%
    as.data.frame() %>%
    impute_lm(DepDelay ~ Month) %>%
    ggplot(aes(x = Month, y = DepDelay, colour = DepDelay_NA)) +
    geom_point()

# Naniar summaries
dplyr::n_distinct(hflights)
## [1] 227496
dplyr::n_distinct(hflights$Year)
## [1] 1
n_miss(hflights)
## [1] 25755
n_complete(hflights)
## [1] 4751661
n_complete(hflights$Year)
## [1] 227496
prop_miss_case(hflights)
## [1] 0.01592116
miss_case_summary(hflights)
## # A tibble: 227,496 × 3
##     case n_miss pct_miss
##    <int>  <int>    <dbl>
##  1   195      8     38.1
##  2   211      8     38.1
##  3   324      8     38.1
##  4   336      8     38.1
##  5   348      8     38.1
##  6   416      8     38.1
##  7   425      8     38.1
##  8   535      8     38.1
##  9   804      8     38.1
## 10   952      8     38.1
## # ℹ 227,486 more rows
miss_case_table(hflights)
## # A tibble: 5 × 3
##   n_miss_in_case n_cases pct_cases
##            <int>   <int>     <dbl>
## 1              0  223874   98.4   
## 2              3     556    0.244 
## 3              5     119    0.0523
## 4              6      42    0.0185
## 5              8    2905    1.28
miss_var_table(hflights)
## # A tibble: 5 × 3
##   n_miss_in_var n_vars pct_vars
##           <int>  <int>    <dbl>
## 1             0     13    61.9 
## 2          2905      2     9.52
## 3          2947      1     4.76
## 4          3066      2     9.52
## 5          3622      3    14.3
miss_var_run(hflights, Month)
## # A tibble: 1 × 2
##   run_length is_na   
##        <int> <chr>   
## 1     227496 complete
miss_var_span(hflights, DepDelay, span_every = 10000)
## # A tibble: 23 × 6
##    span_counter n_miss n_complete prop_miss prop_complete n_in_span
##           <int>  <int>      <int>     <dbl>         <dbl>     <int>
##  1            1    133       9867    0.0133         0.987     10000
##  2            2     82       9918    0.0082         0.992     10000
##  3            3    525       9475    0.0525         0.948     10000
##  4            4    567       9433    0.0567         0.943     10000
##  5            5     83       9917    0.0083         0.992     10000
##  6            6     87       9913    0.0087         0.991     10000
##  7            7    116       9884    0.0116         0.988     10000
##  8            8    131       9869    0.0131         0.987     10000
##  9            9    166       9834    0.0166         0.983     10000
## 10           10     96       9904    0.0096         0.990     10000
## # ℹ 13 more rows
hflights %>% miss_var_summary()
## # A tibble: 21 × 3
##    variable          n_miss pct_miss
##    <chr>              <int>    <num>
##  1 ActualElapsedTime   3622     1.59
##  2 AirTime             3622     1.59
##  3 ArrDelay            3622     1.59
##  4 ArrTime             3066     1.35
##  5 TaxiIn              3066     1.35
##  6 TaxiOut             2947     1.30
##  7 DepTime             2905     1.28
##  8 DepDelay            2905     1.28
##  9 Year                   0     0   
## 10 Month                  0     0   
## # ℹ 11 more rows
hflights %>%
  group_by(Month) %>%
  miss_var_summary()
## # A tibble: 240 × 4
## # Groups:   Month [12]
##    Month variable          n_miss pct_miss
##    <int> <chr>              <int>    <num>
##  1     1 ActualElapsedTime    245     1.30
##  2     1 AirTime              245     1.30
##  3     1 ArrDelay             245     1.30
##  4     1 ArrTime              219     1.16
##  5     1 TaxiIn               219     1.16
##  6     1 TaxiOut              206     1.09
##  7     1 DepTime              201     1.06
##  8     1 DepDelay             201     1.06
##  9     1 Year                   0     0   
## 10     1 DayofMonth             0     0   
## # ℹ 230 more rows
hflights %>%
  add_prop_miss() %>%
  head()
##      Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier FlightNum
## 5424 2011     1          1         6    1400    1500            AA       428
## 5425 2011     1          2         7    1401    1501            AA       428
## 5426 2011     1          3         1    1352    1502            AA       428
## 5427 2011     1          4         2    1403    1513            AA       428
## 5428 2011     1          5         3    1405    1507            AA       428
## 5429 2011     1          6         4    1359    1503            AA       428
##      TailNum ActualElapsedTime AirTime ArrDelay DepDelay Origin Dest Distance
## 5424  N576AA                60      40      -10        0    IAH  DFW      224
## 5425  N557AA                60      45       -9        1    IAH  DFW      224
## 5426  N541AA                70      48       -8       -8    IAH  DFW      224
## 5427  N403AA                70      39        3        3    IAH  DFW      224
## 5428  N492AA                62      44       -3        5    IAH  DFW      224
## 5429  N262AA                64      45       -7       -1    IAH  DFW      224
##      TaxiIn TaxiOut Cancelled CancellationCode Diverted prop_miss_all
## 5424      7      13         0                         0             0
## 5425      6       9         0                         0             0
## 5426      5      17         0                         0             0
## 5427      9      22         0                         0             0
## 5428      9       9         0                         0             0
## 5429      6      13         0                         0             0
library(rpart)
library(rpart.plot)

hflights %>%
  add_prop_miss() %>%
  rpart(prop_miss_all ~ ., data = .) %>%
  prp(type = 4, extra = 101, prefix = "Prop. Miss = ")
## Warning: Cannot retrieve the data used to build the model (so cannot determine roundint and is.binary for the variables).
## To silence this warning:
##     Call prp with roundint=FALSE,
##     or rebuild the rpart model with model=TRUE.