library(nycflights13)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Create the code makes a table for each of the below questions.

  1. join + filter - Which airplanes fly LGA to XNA (1 POINT)
q1 <- flights %>%
  inner_join(planes, by = "tailnum") %>%
  filter(origin == "LGA" & dest == "XNA") %>%
  distinct(tailnum, .keep_all = TRUE) %>%
    select(tailnum, carrier)
  print (q1)
## # A tibble: 4 × 2
##   tailnum carrier
##   <chr>   <chr>  
## 1 N711MQ  MQ     
## 2 N737MQ  MQ     
## 3 N840MQ  MQ     
## 4 N713EV  EV

Airplanes N711MQ, N737MQ, N840MQ, AND N713MQ all fly from LGA to XNA.

  1. join - Add the airline name to the flights table (1 POINT)
q2 <- flights %>%
  inner_join(airlines, by = "carrier") 
print(q2)
## # A tibble: 336,776 × 20
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ℹ 336,766 more rows
## # ℹ 12 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>, name <chr>
  1. join + select + distinct() - Which airports have no commercial flights (1 POINT)
dest <- flights %>%
  distinct(dest) %>%
  rename(airport = dest)
origin <- flights %>%
  distinct(origin) %>%
  rename(airport = origin) 


all_air <- airports %>%
  select(faa) %>%
  rename(airport = faa)
q3 <- bind_rows(dest, origin) %>%
  distinct()
no_comm <- all_air %>%
  anti_join(q3, by = "airport") 
print(no_comm)
## # A tibble: 1,355 × 1
##    airport
##    <chr>  
##  1 04G    
##  2 06A    
##  3 06C    
##  4 06N    
##  5 09J    
##  6 0A9    
##  7 0G6    
##  8 0G7    
##  9 0P2    
## 10 0S9    
## # ℹ 1,345 more rows
  1. EXTRA CREDIT - (2 POINT2) - NO HELP - NO PARTIAL CREDIT Create a table with the names of the airports with the most winds (wind_speed > 30). The table must contain only the airport name (airports$name) and no duplicate rows
ex <- weather %>%
  filter(wind_speed > 30)

high_wind <- ex %>%
  inner_join(airports, by = c("origin" = "faa")) %>%
  select(name) %>%
  distinct() 

print (high_wind)
## # A tibble: 3 × 1
##   name               
##   <chr>              
## 1 Newark Liberty Intl
## 2 John F Kennedy Intl
## 3 La Guardia