1. Join + Filter

Which airplanes fly LGA to XNA (1 POINT)

Q1 <- left_join(planes, flights, by = 'tailnum') %>%
  filter(origin == 'LGA' & dest == 'XNA') %>%
  select(manufacturer, model, engine, origin, dest) %>%
  distinct()

print(Q1)
## # A tibble: 4 × 5
##   manufacturer         model       engine        origin dest 
##   <chr>                <chr>       <chr>         <chr>  <chr>
## 1 GULFSTREAM AEROSPACE G1159B      Turbo-jet     LGA    XNA  
## 2 BOMBARDIER INC       CL-600-2C10 Turbo-fan     LGA    XNA  
## 3 CESSNA               172N        Reciprocating LGA    XNA  
## 4 CANADAIR LTD         CF-5D       Turbo-jet     LGA    XNA

2. Join

Add the airline name to the flights table (1 POINT)

Q2 <- right_join(airlines, flights, by = 'carrier') %>%
  select(name, carrier, everything()) %>%
  arrange(time_hour)

print(Q2)
## # A tibble: 336,776 × 20
##    name     carrier  year month   day dep_time sched_dep_time dep_delay arr_time
##    <chr>    <chr>   <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1 America… AA       2013     1     1      542            540         2      923
##  2 JetBlue… B6       2013     1     1      544            545        -1     1004
##  3 JetBlue… B6       2013     1     1      559            559         0      702
##  4 United … UA       2013     1     1      517            515         2      830
##  5 United … UA       2013     1     1      533            529         4      850
##  6 United … UA       2013     1     1      554            558        -4      740
##  7 America… AA       2013     1     1      558            600        -2      753
##  8 America… AA       2013     1     1      559            600        -1      941
##  9 America… AA       2013     1     1      606            610        -4      858
## 10 America… AA       2013     1     1      623            610        13      920
## # ℹ 336,766 more rows
## # ℹ 11 more variables: sched_arr_time <int>, arr_delay <dbl>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>

3. Join + Select + Distinct

Which airports have no commercial flights (1 POINT)

Q3 <- anti_join(airports, flights, by = c('faa' = 'origin')) %>%
  anti_join(flights, by = c('faa' = 'dest')) %>%
  select(faa, name, tzone) %>%
  distinct()

print(Q3)
## # A tibble: 1,355 × 3
##    faa   name                           tzone              
##    <chr> <chr>                          <chr>              
##  1 04G   Lansdowne Airport              America/New_York   
##  2 06A   Moton Field Municipal Airport  America/Chicago    
##  3 06C   Schaumburg Regional            America/Chicago    
##  4 06N   Randall Airport                America/New_York   
##  5 09J   Jekyll Island Airport          America/New_York   
##  6 0A9   Elizabethton Municipal Airport America/New_York   
##  7 0G6   Williams County Airport        America/New_York   
##  8 0G7   Finger Lakes Regional Airport  America/New_York   
##  9 0P2   Shoestring Aviation Airfield   America/New_York   
## 10 0S9   Jefferson County Intl          America/Los_Angeles
## # ℹ 1,345 more rows

4. EXTRA CREDIT - (2 POINTS)

Create a table with the names of the airports with the most winds (wind_speed > 30). The table must contain only the airport name (airports$name) and no duplicate rows

high_wind_weather <- weather %>%
  filter(wind_speed > 30)

high_wind_airports <- high_wind_weather %>%
  left_join(airports, by = c("origin" = "faa")) %>%
  select(name) %>%
  distinct()

print(high_wind_airports)
## # A tibble: 3 × 1
##   name               
##   <chr>              
## 1 Newark Liberty Intl
## 2 John F Kennedy Intl
## 3 La Guardia