library(nycflights13)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

1. join + filter - Which airplanes fly LGA to XNA (1 POINT)

flights_lga_xna <- flights %>%
  filter(origin == "LGA", dest == "XNA") %>%
  left_join(planes, by = "tailnum") %>%        
  left_join(airlines, by = "carrier") %>%     
  select(tailnum, manufacturer, model, year.y, name, origin, dest, flight)

# Rename year.y to something clearer
flights_lga_xna <- flights_lga_xna %>%
  rename(plane_year = year.y)

flights_lga_xna
## # A tibble: 745 × 8
##    tailnum manufacturer         model  plane_year name      origin dest  flight
##    <chr>   <chr>                <chr>       <int> <chr>     <chr>  <chr>  <int>
##  1 N722MQ  <NA>                 <NA>           NA Envoy Air LGA    XNA     4534
##  2 N719MQ  <NA>                 <NA>           NA Envoy Air LGA    XNA     4525
##  3 N739MQ  <NA>                 <NA>           NA Envoy Air LGA    XNA     4413
##  4 N719MQ  <NA>                 <NA>           NA Envoy Air LGA    XNA     4534
##  5 N711MQ  GULFSTREAM AEROSPACE G1159B       1976 Envoy Air LGA    XNA     4525
##  6 N723MQ  <NA>                 <NA>           NA Envoy Air LGA    XNA     4413
##  7 N711MQ  GULFSTREAM AEROSPACE G1159B       1976 Envoy Air LGA    XNA     4534
##  8 N730MQ  <NA>                 <NA>           NA Envoy Air LGA    XNA     4525
##  9 N722MQ  <NA>                 <NA>           NA Envoy Air LGA    XNA     4413
## 10 N719MQ  <NA>                 <NA>           NA Envoy Air LGA    XNA     4534
## # ℹ 735 more rows

2. join - Add the airline name to the flights table (1 POINT)

flights_with_airline <- flights %>%
  left_join(airlines, by = "carrier")  

flights_with_airline
## # A tibble: 336,776 × 20
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ℹ 336,766 more rows
## # ℹ 12 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>, name <chr>

3. join + select + distinct() - Which airports have no commercial flights (1 POINT)

# Step 1: Find all airports used in flights (origin or destination)
active_airports <- flights %>%
  select(origin, dest) %>%
  pivot_longer(cols = c(origin, dest), values_to = "faa") %>%
  distinct(faa)

# Step 2: Find airports not used in any commercial flights
no_commercial_flights <- airports %>%
  anti_join(active_airports, by = "faa") %>%
  select(faa, name, lat, lon, alt, tz, dst, tzone)

# View result
no_commercial_flights
## # A tibble: 1,355 × 8
##    faa   name                             lat    lon   alt    tz dst   tzone    
##    <chr> <chr>                          <dbl>  <dbl> <dbl> <dbl> <chr> <chr>    
##  1 04G   Lansdowne Airport               41.1  -80.6  1044    -5 A     America/…
##  2 06A   Moton Field Municipal Airport   32.5  -85.7   264    -6 A     America/…
##  3 06C   Schaumburg Regional             42.0  -88.1   801    -6 A     America/…
##  4 06N   Randall Airport                 41.4  -74.4   523    -5 A     America/…
##  5 09J   Jekyll Island Airport           31.1  -81.4    11    -5 A     America/…
##  6 0A9   Elizabethton Municipal Airport  36.4  -82.2  1593    -5 A     America/…
##  7 0G6   Williams County Airport         41.5  -84.5   730    -5 A     America/…
##  8 0G7   Finger Lakes Regional Airport   42.9  -76.8   492    -5 A     America/…
##  9 0P2   Shoestring Aviation Airfield    39.8  -76.6  1000    -5 U     America/…
## 10 0S9   Jefferson County Intl           48.1 -123.    108    -8 A     America/…
## # ℹ 1,345 more rows

4. EXTRA CREDIT - (2 POINT2) - NO HELP - NO PARTIAL CREDIT

windy_airports <- weather %>%
  filter(wind_speed > 30) %>%              # only records with high wind speeds
  select(origin) %>%                       # keep the airport code
  distinct(origin) %>%                     # remove duplicates
  left_join(airports, by = c("origin" = "faa")) %>%  # match with airport names
  select(name) %>%                         # only show airport names
  distinct()                               # ensure no duplicate names

windy_airports
## # A tibble: 3 × 1
##   name               
##   <chr>              
## 1 Newark Liberty Intl
## 2 John F Kennedy Intl
## 3 La Guardia