library(nycflights13)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

#Question 1

# Create the code makes a table for each of the below questions.

# 1. join + filter - Which airplanes fly LGA to XNA (1 POINT)

flights_lga_xna <- flights %>%
  filter(origin == "LGA", dest == "XNA")  

flights_with_planes <- flights_lga_xna %>%
  left_join(planes, by = "tailnum") 

flights_with_planes %>%
  select(month, day, tailnum, model, manufacturer)  
## # A tibble: 745 × 5
##    month   day tailnum model  manufacturer        
##    <int> <int> <chr>   <chr>  <chr>               
##  1     1     1 N722MQ  <NA>   <NA>                
##  2     1     1 N719MQ  <NA>   <NA>                
##  3     1     1 N739MQ  <NA>   <NA>                
##  4     1     2 N719MQ  <NA>   <NA>                
##  5     1     2 N711MQ  G1159B GULFSTREAM AEROSPACE
##  6     1     2 N723MQ  <NA>   <NA>                
##  7     1     3 N711MQ  G1159B GULFSTREAM AEROSPACE
##  8     1     3 N730MQ  <NA>   <NA>                
##  9     1     3 N722MQ  <NA>   <NA>                
## 10     1     4 N719MQ  <NA>   <NA>                
## # ℹ 735 more rows
head(flights_lga_xna)
## # A tibble: 6 × 19
##    year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##   <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
## 1  2013     1     1      656            705        -9     1007            940
## 2  2013     1     1     1525           1530        -5     1934           1805
## 3  2013     1     1     1740           1745        -5     2158           2020
## 4  2013     1     2      656            705        -9     1014            940
## 5  2013     1     2     1531           1530         1     1846           1805
## 6  2013     1     2     1740           1745        -5     2035           2020
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>

#Question 2

# 2. join  - Add the airline name to the flights table (1 POINT)

flights_with_airlines <- flights %>%
  left_join(airlines, by = "carrier")

flights_with_airlines %>%
  select(year, month, day, carrier, name, tailnum, origin, dest)
## # A tibble: 336,776 × 8
##     year month   day carrier name                     tailnum origin dest 
##    <int> <int> <int> <chr>   <chr>                    <chr>   <chr>  <chr>
##  1  2013     1     1 UA      United Air Lines Inc.    N14228  EWR    IAH  
##  2  2013     1     1 UA      United Air Lines Inc.    N24211  LGA    IAH  
##  3  2013     1     1 AA      American Airlines Inc.   N619AA  JFK    MIA  
##  4  2013     1     1 B6      JetBlue Airways          N804JB  JFK    BQN  
##  5  2013     1     1 DL      Delta Air Lines Inc.     N668DN  LGA    ATL  
##  6  2013     1     1 UA      United Air Lines Inc.    N39463  EWR    ORD  
##  7  2013     1     1 B6      JetBlue Airways          N516JB  EWR    FLL  
##  8  2013     1     1 EV      ExpressJet Airlines Inc. N829AS  LGA    IAD  
##  9  2013     1     1 B6      JetBlue Airways          N593JB  JFK    MCO  
## 10  2013     1     1 AA      American Airlines Inc.   N3ALAA  LGA    ORD  
## # ℹ 336,766 more rows

#Question 3

# 3. join + select + distinct() - Which airports have no commercial flights (1 POINT)

airports_with_flights <- flights %>%
  select(origin, dest) %>%
  distinct()

airports_no_flights <- airports %>%
  anti_join(airports_with_flights, by = c("faa" = "origin")) %>%  
  anti_join(airports_with_flights, by = c("faa" = "dest"))      

airports_no_flights %>%
  select(faa, name, lat, lon)
## # A tibble: 1,355 × 4
##    faa   name                             lat    lon
##    <chr> <chr>                          <dbl>  <dbl>
##  1 04G   Lansdowne Airport               41.1  -80.6
##  2 06A   Moton Field Municipal Airport   32.5  -85.7
##  3 06C   Schaumburg Regional             42.0  -88.1
##  4 06N   Randall Airport                 41.4  -74.4
##  5 09J   Jekyll Island Airport           31.1  -81.4
##  6 0A9   Elizabethton Municipal Airport  36.4  -82.2
##  7 0G6   Williams County Airport         41.5  -84.5
##  8 0G7   Finger Lakes Regional Airport   42.9  -76.8
##  9 0P2   Shoestring Aviation Airfield    39.8  -76.6
## 10 0S9   Jefferson County Intl           48.1 -123. 
## # ℹ 1,345 more rows

#Question 4

# 4. EXTRA CREDIT - (2 POINT2) - NO HELP - NO PARTIAL CREDIT
# Create a table with the names of the airports with the most 
# winds (wind_speed > 30). The table must contain only the airport 
# name (airports$name) and no duplicate rows

airports_high_winds <- weather %>%
  filter(wind_speed > 30) %>%  
  distinct(origin) %>%        
  inner_join(airports, by = c("origin" = "faa")) %>%  
  select(name) %>%  
  distinct()        
  airports_high_winds
## # A tibble: 3 × 1
##   name               
##   <chr>              
## 1 Newark Liberty Intl
## 2 John F Kennedy Intl
## 3 La Guardia