library(nycflights13)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

1. join + filter - Which airplanes fly LGA to XNA (1 POINT)

q1_lga_to_xna_airplanes <- flights %>%
  filter(origin == "LGA", dest == "XNA") %>%
  select(year, month, day, carrier, flight, tailnum) %>%
  distinct() %>%
  left_join(planes, by = "tailnum") %>%
  arrange(tailnum)

q1_lga_to_xna_airplanes
## # A tibble: 745 × 14
##    year.x month   day carrier flight tailnum year.y type  manufacturer model
##     <int> <int> <int> <chr>    <int> <chr>    <int> <chr> <chr>        <chr>
##  1   2013    12     4 MQ        3553 N0EGMQ      NA <NA>  <NA>         <NA> 
##  2   2013    12    18 MQ        3553 N501MQ      NA <NA>  <NA>         <NA> 
##  3   2013    12     2 MQ        3553 N507MQ      NA <NA>  <NA>         <NA> 
##  4   2013    12     3 MQ        3547 N510MQ      NA <NA>  <NA>         <NA> 
##  5   2013    12     3 MQ        3553 N510MQ      NA <NA>  <NA>         <NA> 
##  6   2013    12     4 MQ        3547 N510MQ      NA <NA>  <NA>         <NA> 
##  7   2013    12    30 MQ        3553 N510MQ      NA <NA>  <NA>         <NA> 
##  8   2013    12    22 MQ        3553 N511MQ      NA <NA>  <NA>         <NA> 
##  9   2013    12    23 MQ        3547 N511MQ      NA <NA>  <NA>         <NA> 
## 10   2013    12    23 MQ        3553 N511MQ      NA <NA>  <NA>         <NA> 
## # ℹ 735 more rows
## # ℹ 4 more variables: engines <int>, seats <int>, speed <int>, engine <chr>

2. join - Add the airline name to the flights table (1 POINT)

q2_flights_with_airline_name <- flights %>%
  left_join(airlines, by = "carrier") %>%
  rename(airline_name = name)

q2_flights_with_airline_name
## # A tibble: 336,776 × 20
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ℹ 336,766 more rows
## # ℹ 12 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>, airline_name <chr>

3. join + select + distinct() - Which airports have no commercial flights (1 POINT)

used_airports <- flights %>%
  select(origin, dest) %>%
  pivot_longer(cols = c(origin, dest), values_to = "faa") %>%
  select(faa) %>%
  distinct()

q3_airports_with_no_flights <- airports %>%
  select(faa, name) %>%
  anti_join(used_airports, by = "faa") %>%
  distinct() %>%
  arrange(faa)

q3_airports_with_no_flights
## # A tibble: 1,355 × 2
##    faa   name                          
##    <chr> <chr>                         
##  1 04G   Lansdowne Airport             
##  2 06A   Moton Field Municipal Airport 
##  3 06C   Schaumburg Regional           
##  4 06N   Randall Airport               
##  5 09J   Jekyll Island Airport         
##  6 0A9   Elizabethton Municipal Airport
##  7 0G6   Williams County Airport       
##  8 0G7   Finger Lakes Regional Airport 
##  9 0P2   Shoestring Aviation Airfield  
## 10 0S9   Jefferson County Intl         
## # ℹ 1,345 more rows

4. EXTRA CREDIT - (2 POINT2) - NO HELP - NO PARTIAL CREDIT

q4_airports_most_high_winds <- weather %>%
  filter(wind_speed > 30) %>%
  count(origin, name = "high_wind_obs") %>%
  filter(high_wind_obs == max(high_wind_obs)) %>%
  left_join(airports %>% select(faa, name), by = c("origin" = "faa")) %>%
  select(name) %>%
  distinct()

q4_airports_most_high_winds
## # A tibble: 1 × 1
##   name               
##   <chr>              
## 1 John F Kennedy Intl