packages

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(robotstxt)
library(rvest)
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
## 
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
## 
##     sleep
library(nycflights13)

Example slides 1.1

x_vec <- c(1,2,3,4)
x_vec > 2
## [1] FALSE FALSE  TRUE  TRUE
sum(x_vec>2)
## [1] 2

Assignment 1 Review

bike1 <- read.csv("Datasets/bike_sharing_data.csv")

# obs/variables

glimpse(bike1)
## Rows: 17,379
## Columns: 13
## $ datetime   <chr> "1/1/2011 0:00", "1/1/2011 1:00", "1/1/2011 2:00", "1/1/201…
## $ season     <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ holiday    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ workingday <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ weather    <int> 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3,…
## $ temp       <dbl> 9.84, 9.02, 9.02, 9.84, 9.84, 9.84, 9.02, 8.20, 9.84, 13.12…
## $ atemp      <dbl> 14.395, 13.635, 13.635, 14.395, 14.395, 12.880, 13.635, 12.…
## $ humidity   <chr> "81", "80", "80", "75", "75", "75", "80", "86", "75", "76",…
## $ windspeed  <dbl> 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 6.0032, 0.0000, 0.0…
## $ casual     <int> 3, 8, 5, 3, 0, 0, 2, 1, 1, 8, 12, 26, 29, 47, 35, 40, 41, 1…
## $ registered <int> 13, 32, 27, 10, 1, 1, 0, 2, 7, 6, 24, 30, 55, 47, 71, 70, 5…
## $ count      <int> 16, 40, 32, 13, 1, 1, 2, 3, 8, 14, 36, 56, 84, 94, 106, 110…
## $ sources    <chr> "ad campaign", "www.yahoo.com", "www.google.fi", "AD campai…

what data type is humidity

str(bike1)
## 'data.frame':    17379 obs. of  13 variables:
##  $ datetime  : chr  "1/1/2011 0:00" "1/1/2011 1:00" "1/1/2011 2:00" "1/1/2011 3:00" ...
##  $ season    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ holiday   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ workingday: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ weather   : int  1 1 1 1 1 2 1 1 1 1 ...
##  $ temp      : num  9.84 9.02 9.02 9.84 9.84 ...
##  $ atemp     : num  14.4 13.6 13.6 14.4 14.4 ...
##  $ humidity  : chr  "81" "80" "80" "75" ...
##  $ windspeed : num  0 0 0 0 0 ...
##  $ casual    : int  3 8 5 3 0 0 2 1 1 8 ...
##  $ registered: int  13 32 27 10 1 1 0 2 7 6 ...
##  $ count     : int  16 40 32 13 1 1 2 3 8 14 ...
##  $ sources   : chr  "ad campaign" "www.yahoo.com" "www.google.fi" "AD campaign" ...

value of season in row 6251

bike1$season[6251]
## [1] 4

how many obs are winter

dim(subset(bike1,season==4))
## [1] 4232   13

# obs high wind during winter or spring

dim(subset(bike1, windspeed >= 40 & season %in% c(1,4)))
## [1] 46 13

Assignment 2 Review

get microsoft daily stock symbols

getSymbols("MSFT", src="yahoo",from = "2024-10-01", to = "2025-02-01")
## [1] "MSFT"

# of days

dim(MSFT)
## [1] 84  6

when was stock price highest

plot(MSFT$MSFT.Open)

Assignment 3 Review

is scraping allowed

paths_allowed("https://en.wikipedia.org/wiki/2024_in_film")
##  en.wikipedia.org
## [1] TRUE

scrape table data

link <- "https://en.wikipedia.org/wiki/2024_in_film"
page <- read_html(link)
tbls <- html_elements(page, "table")
highest_gross <- html_table(tbls[[2]])
highest_gross
## # A tibble: 10 × 4
##     Rank Title                           Distributor  `Worldwide gross`
##    <int> <chr>                           <chr>        <chr>            
##  1     1 Inside Out 2                    Disney       $1,698,863,816   
##  2     2 Deadpool & Wolverine            Disney       $1,338,073,645   
##  3     3 Moana 2                         Disney       $1,059,242,164   
##  4     4 Despicable Me 4                 Universal    $971,315,095     
##  5     5 Wicked                          Universal    $756,535,681     
##  6     6 Mufasa: The Lion King           Disney       $723,060,982     
##  7     7 Dune: Part Two                  Warner Bros. $715,409,065     
##  8     8 Godzilla x Kong: The New Empire Warner Bros. $572,505,338     
##  9     9 Kung Fu Panda 4                 Universal    $547,689,492     
## 10    10 Sonic the Hedgehog 3            Paramount    $492,162,604

Assignment 4

# missing values in Petal.Length

dirty_iris <- read.csv("https://raw.githubusercontent.com/edwindj/datacleaning/master/data/dirty_iris.csv")

sum(is.na(dirty_iris$Petal.Length))
## [1] 19

# and % of complete observations

num_complete <- sum(complete.cases(dirty_iris))
num_complete
## [1] 96
nrow(na.omit(dirty_iris))
## [1] 96
pct_complete <- num_complete / nrow(dirty_iris) * 100
pct_complete
## [1] 64

what type of special values in numeric columns

summary(dirty_iris)
##   Sepal.Length     Sepal.Width      Petal.Length    Petal.Width 
##  Min.   : 0.000   Min.   :-3.000   Min.   : 0.00   Min.   :0.1  
##  1st Qu.: 5.100   1st Qu.: 2.800   1st Qu.: 1.60   1st Qu.:0.3  
##  Median : 5.750   Median : 3.000   Median : 4.50   Median :1.3  
##  Mean   : 6.559   Mean   : 3.391   Mean   : 4.45   Mean   :Inf  
##  3rd Qu.: 6.400   3rd Qu.: 3.300   3rd Qu.: 5.10   3rd Qu.:1.8  
##  Max.   :73.000   Max.   :30.000   Max.   :63.00   Max.   :Inf  
##  NA's   :10       NA's   :17       NA's   :19      NA's   :12   
##    Species         
##  Length:150        
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 

locate inf and replace with placeholder

inf_indicator <- is.infinite(dirty_iris$Petal.Width)
dirty_iris$Petal.Width[inf_indicator] <- NA
summary(dirty_iris)
##   Sepal.Length     Sepal.Width      Petal.Length    Petal.Width   
##  Min.   : 0.000   Min.   :-3.000   Min.   : 0.00   Min.   :0.100  
##  1st Qu.: 5.100   1st Qu.: 2.800   1st Qu.: 1.60   1st Qu.:0.300  
##  Median : 5.750   Median : 3.000   Median : 4.50   Median :1.300  
##  Mean   : 6.559   Mean   : 3.391   Mean   : 4.45   Mean   :1.207  
##  3rd Qu.: 6.400   3rd Qu.: 3.300   3rd Qu.: 5.10   3rd Qu.:1.800  
##  Max.   :73.000   Max.   :30.000   Max.   :63.00   Max.   :2.500  
##  NA's   :10       NA's   :17       NA's   :19      NA's   :13     
##    Species         
##  Length:150        
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 

find obs that violate rules

rules_violate <- subset(dirty_iris,c((Sepal.Width<=0)|(Sepal.Length>30)))
nrow(rules_violate)
## [1] 4

locate obs that violate and correct

neg_indicator <- which(dirty_iris$Sepal.Width<0)
dirty_iris$Sepal.Width[neg_indicator]<-abs(dirty_iris$Sepal.Width[neg_indicator])
dirty_iris[neg_indicator,]
##    Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 16            5           3          3.5           1 versicolor
zero_indicator <- which(dirty_iris$Sepal.Width == 0)
dirty_iris$Sepal.Width[zero_indicator] <- NA

Sepal.Width mean

iris_mean <- mean(dirty_iris$Sepal.Width, na.rm=TRUE) 
iris_mean
## [1] 3.462121

Petal.Length median

iris_median<- median(dirty_iris$Petal.Length, na.rm=TRUE)
iris_median
## [1] 4.5

Sepal.Length linear regression

model <- lm(Sepal.Length~Sepal.Width+Petal.Width, data=dirty_iris)
I <- is.na(dirty_iris$Sepal.Length)
to_be_imputed <- dirty_iris[I,]
dirty_iris$Sepal.Length[I] <- predict(model, newdata = to_be_imputed)

Petal.Width kNN

iris_nearest <- kNN(dirty_iris)
## Sepal.Length Petal.Length  Petal.Width Sepal.Length Petal.Length  Petal.Width 
##          0.0          0.0          0.1         73.0         63.0          2.5 
## Sepal.Length  Sepal.Width  Petal.Width Sepal.Length  Sepal.Width  Petal.Width 
##          0.0          2.2          0.1         73.0         30.0          2.5 
## Sepal.Length  Sepal.Width Petal.Length Sepal.Length  Sepal.Width Petal.Length 
##          0.0          2.2          0.0         73.0         30.0         63.0
#iris_nearest

Base R approach

extract from mtcars

data("mtcars")
mtcars[ ,c("mpg", "cyl")]
##                      mpg cyl
## Mazda RX4           21.0   6
## Mazda RX4 Wag       21.0   6
## Datsun 710          22.8   4
## Hornet 4 Drive      21.4   6
## Hornet Sportabout   18.7   8
## Valiant             18.1   6
## Duster 360          14.3   8
## Merc 240D           24.4   4
## Merc 230            22.8   4
## Merc 280            19.2   6
## Merc 280C           17.8   6
## Merc 450SE          16.4   8
## Merc 450SL          17.3   8
## Merc 450SLC         15.2   8
## Cadillac Fleetwood  10.4   8
## Lincoln Continental 10.4   8
## Chrysler Imperial   14.7   8
## Fiat 128            32.4   4
## Honda Civic         30.4   4
## Toyota Corolla      33.9   4
## Toyota Corona       21.5   4
## Dodge Challenger    15.5   8
## AMC Javelin         15.2   8
## Camaro Z28          13.3   8
## Pontiac Firebird    19.2   8
## Fiat X1-9           27.3   4
## Porsche 914-2       26.0   4
## Lotus Europa        30.4   4
## Ford Pantera L      15.8   8
## Ferrari Dino        19.7   6
## Maserati Bora       15.0   8
## Volvo 142E          21.4   4

extract columns w string s

mtcars[grep("s", names(mtcars))]
##                      disp  qsec vs
## Mazda RX4           160.0 16.46  0
## Mazda RX4 Wag       160.0 17.02  0
## Datsun 710          108.0 18.61  1
## Hornet 4 Drive      258.0 19.44  1
## Hornet Sportabout   360.0 17.02  0
## Valiant             225.0 20.22  1
## Duster 360          360.0 15.84  0
## Merc 240D           146.7 20.00  1
## Merc 230            140.8 22.90  1
## Merc 280            167.6 18.30  1
## Merc 280C           167.6 18.90  1
## Merc 450SE          275.8 17.40  0
## Merc 450SL          275.8 17.60  0
## Merc 450SLC         275.8 18.00  0
## Cadillac Fleetwood  472.0 17.98  0
## Lincoln Continental 460.0 17.82  0
## Chrysler Imperial   440.0 17.42  0
## Fiat 128             78.7 19.47  1
## Honda Civic          75.7 18.52  1
## Toyota Corolla       71.1 19.90  1
## Toyota Corona       120.1 20.01  1
## Dodge Challenger    318.0 16.87  0
## AMC Javelin         304.0 17.30  0
## Camaro Z28          350.0 15.41  0
## Pontiac Firebird    400.0 17.05  0
## Fiat X1-9            79.0 18.90  1
## Porsche 914-2       120.3 16.70  0
## Lotus Europa         95.1 16.90  1
## Ford Pantera L      351.0 14.50  0
## Ferrari Dino        145.0 15.50  0
## Maserati Bora       301.0 14.60  0
## Volvo 142E          121.0 18.60  1

add new column hpCyl

mtcars$hpCyl <- mtcars$hp + mtcars$cyl

extract rows on rule

subset(mtcars, (cyl<=6) & (hp>500))
##  [1] mpg   cyl   disp  hp    drat  wt    qsec  vs    am    gear  carb  hpCyl
## <0 rows> (or 0-length row.names)

summarize data in groups

cyl4 <- mtcars[which(mtcars$cyl==4),]
mean(cyl4$disp)
## [1] 105.1364
max(cyl4$hp)
## [1] 113

dplyr approach

extract from mtcars

select(mtcars, mpg, cyl)
##                      mpg cyl
## Mazda RX4           21.0   6
## Mazda RX4 Wag       21.0   6
## Datsun 710          22.8   4
## Hornet 4 Drive      21.4   6
## Hornet Sportabout   18.7   8
## Valiant             18.1   6
## Duster 360          14.3   8
## Merc 240D           24.4   4
## Merc 230            22.8   4
## Merc 280            19.2   6
## Merc 280C           17.8   6
## Merc 450SE          16.4   8
## Merc 450SL          17.3   8
## Merc 450SLC         15.2   8
## Cadillac Fleetwood  10.4   8
## Lincoln Continental 10.4   8
## Chrysler Imperial   14.7   8
## Fiat 128            32.4   4
## Honda Civic         30.4   4
## Toyota Corolla      33.9   4
## Toyota Corona       21.5   4
## Dodge Challenger    15.5   8
## AMC Javelin         15.2   8
## Camaro Z28          13.3   8
## Pontiac Firebird    19.2   8
## Fiat X1-9           27.3   4
## Porsche 914-2       26.0   4
## Lotus Europa        30.4   4
## Ford Pantera L      15.8   8
## Ferrari Dino        19.7   6
## Maserati Bora       15.0   8
## Volvo 142E          21.4   4

extract columns w string s

mtcars %>% 
  select(contains("s"))
##                      disp  qsec vs
## Mazda RX4           160.0 16.46  0
## Mazda RX4 Wag       160.0 17.02  0
## Datsun 710          108.0 18.61  1
## Hornet 4 Drive      258.0 19.44  1
## Hornet Sportabout   360.0 17.02  0
## Valiant             225.0 20.22  1
## Duster 360          360.0 15.84  0
## Merc 240D           146.7 20.00  1
## Merc 230            140.8 22.90  1
## Merc 280            167.6 18.30  1
## Merc 280C           167.6 18.90  1
## Merc 450SE          275.8 17.40  0
## Merc 450SL          275.8 17.60  0
## Merc 450SLC         275.8 18.00  0
## Cadillac Fleetwood  472.0 17.98  0
## Lincoln Continental 460.0 17.82  0
## Chrysler Imperial   440.0 17.42  0
## Fiat 128             78.7 19.47  1
## Honda Civic          75.7 18.52  1
## Toyota Corolla       71.1 19.90  1
## Toyota Corona       120.1 20.01  1
## Dodge Challenger    318.0 16.87  0
## AMC Javelin         304.0 17.30  0
## Camaro Z28          350.0 15.41  0
## Pontiac Firebird    400.0 17.05  0
## Fiat X1-9            79.0 18.90  1
## Porsche 914-2       120.3 16.70  0
## Lotus Europa         95.1 16.90  1
## Ford Pantera L      351.0 14.50  0
## Ferrari Dino        145.0 15.50  0
## Maserati Bora       301.0 14.60  0
## Volvo 142E          121.0 18.60  1

add new column hpCyl

mtcars %>%
  mutate(hpCyl = hp + cyl)
##                      mpg cyl  disp  hp drat    wt  qsec vs am gear carb hpCyl
## Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4   116
## Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4   116
## Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1    97
## Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1   116
## Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2   183
## Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1   111
## Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4   253
## Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2    66
## Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2    99
## Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4   129
## Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4   129
## Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3   188
## Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3   188
## Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3   188
## Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4   213
## Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4   223
## Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4   238
## Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1    70
## Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2    56
## Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1    69
## Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1   101
## Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2   158
## AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2   158
## Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4   253
## Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2   183
## Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1    70
## Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2    95
## Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2   117
## Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4   272
## Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6   181
## Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8   343
## Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2   113

extract rows on rule

filter(mtcars, cyl <= 6, hp>500)
##  [1] mpg   cyl   disp  hp    drat  wt    qsec  vs    am    gear  carb  hpCyl
## <0 rows> (or 0-length row.names)

summarize data in groups

stats_cyl <- mtcars %>%
  group_by(cyl) %>%
  summarize(mean(disp), max(hp))
#stats_cyl

Assignment 5

check code

not_cancelled <- flights %>%
 filter(!is.na(dep_delay))

not_cancelled %>% 
 group_by(year, month, day)
## # A tibble: 328,521 × 19
## # Groups:   year, month, day [365]
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ℹ 328,511 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>

tailnum lowest avg arrival delay

not_cancelled %>% 
group_by(tailnum) %>% 
 summarize(
  delay = mean(arr_delay)) %>%
 arrange(delay)
## # A tibble: 4,037 × 2
##    tailnum delay
##    <chr>   <dbl>
##  1 N560AS  -53  
##  2 N315AS  -51  
##  3 N517AS  -40.5
##  4 N7AYAA  -35  
##  5 N585AS  -34.5
##  6 N915DN  -34  
##  7 N512AS  -31.5
##  8 N594AS  -31.3
##  9 N564AS  -31  
## 10 N593AS  -30.5
## # ℹ 4,027 more rows

departure delays more than 60 min

flights %>%
  group_by(month) %>%
  summarize(
    total_flights = n(),
    delayed_over_60 = sum(dep_delay > 60, na.rm = TRUE),
    prop_delayed = delayed_over_60 / total_flights
  ) %>%
  print()
## # A tibble: 12 × 4
##    month total_flights delayed_over_60 prop_delayed
##    <int>         <int>           <int>        <dbl>
##  1     1         27004            1821       0.0674
##  2     2         24951            1654       0.0663
##  3     3         28834            2340       0.0812
##  4     4         28330            2535       0.0895
##  5     5         28796            2309       0.0802
##  6     6         28243            3494       0.124 
##  7     7         29425            3820       0.130 
##  8     8         29327            2295       0.0783
##  9     9         27574            1330       0.0482
## 10    10         28889            1344       0.0465
## 11    11         27268            1086       0.0398
## 12    12         28135            2553       0.0907

destinations with most carriers

not_cancelled %>% 
 group_by(dest) %>% 
 summarize(carriers = n_distinct(carrier)) %>% 
 arrange(desc(carriers))
## # A tibble: 104 × 2
##    dest  carriers
##    <chr>    <int>
##  1 ATL          7
##  2 BOS          7
##  3 CLT          7
##  4 ORD          7
##  5 TPA          7
##  6 AUS          6
##  7 DCA          6
##  8 DTW          6
##  9 IAD          6
## 10 MSP          6
## # ℹ 94 more rows