library(nycflights13)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# use the below code to look at the data - DO NOT ADD VIEW STATEMENTS TO RMARKDOWN
View(flights)
View(airlines)
View(weather)
View(planes)
View(airports)
# 1. Join + filter - Which airplanes fly from LGA to XNA
# Joining 'flights' and 'planes' to get details about the airplanes (using 'tailnum')
# Filtering flights that go from LGA to XNA
lga_to_xna_planes <- flights %>%
  filter(origin == "LGA", dest == "XNA") %>%
  select(tailnum) %>%
  distinct() %>%
  inner_join(planes, by = "tailnum")
# View the result for airplanes flying from LGA to XNA
lga_to_xna_planes
## # A tibble: 4 × 9
##   tailnum  year type               manufacturer model engines seats speed engine
##   <chr>   <int> <chr>              <chr>        <chr>   <int> <int> <int> <chr> 
## 1 N711MQ   1976 Fixed wing multi … GULFSTREAM … G115…       2    22    NA Turbo…
## 2 N737MQ   1977 Fixed wing single… CESSNA       172N        1     4   105 Recip…
## 3 N840MQ   1974 Fixed wing multi … CANADAIR LTD CF-5D       4     2    NA Turbo…
## 4 N713EV   2003 Fixed wing multi … BOMBARDIER … CL-6…       2    80    NA Turbo…
# 2. join  - Add the airline name to the flights table (1 POINT)
# Joining 'flights' and 'airlines' using 'carrier' to get the airline names
flights_with_airline_names <- flights %>%
  inner_join(airlines, by = "carrier") %>%
  select(year, month, day, dep_time, arr_time, carrier, name)

# View the result with added airline names
flights_with_airline_names
## # A tibble: 336,776 × 7
##     year month   day dep_time arr_time carrier name                    
##    <int> <int> <int>    <int>    <int> <chr>   <chr>                   
##  1  2013     1     1      517      830 UA      United Air Lines Inc.   
##  2  2013     1     1      533      850 UA      United Air Lines Inc.   
##  3  2013     1     1      542      923 AA      American Airlines Inc.  
##  4  2013     1     1      544     1004 B6      JetBlue Airways         
##  5  2013     1     1      554      812 DL      Delta Air Lines Inc.    
##  6  2013     1     1      554      740 UA      United Air Lines Inc.   
##  7  2013     1     1      555      913 B6      JetBlue Airways         
##  8  2013     1     1      557      709 EV      ExpressJet Airlines Inc.
##  9  2013     1     1      557      838 B6      JetBlue Airways         
## 10  2013     1     1      558      753 AA      American Airlines Inc.  
## # ℹ 336,766 more rows
# 3. join + select + distinct() - Which airports have no commercial flights (1 POINT)
# Step 1: Get all airports that have no flights in the flights dataset
airports_with_no_flights <- airports %>%
  anti_join(flights, by = c("faa" = "origin")) %>%
  select(faa, name) %>%
  distinct()

# View the result for airports with no commercial flights
airports_with_no_flights
## # A tibble: 1,455 × 2
##    faa   name                          
##    <chr> <chr>                         
##  1 04G   Lansdowne Airport             
##  2 06A   Moton Field Municipal Airport 
##  3 06C   Schaumburg Regional           
##  4 06N   Randall Airport               
##  5 09J   Jekyll Island Airport         
##  6 0A9   Elizabethton Municipal Airport
##  7 0G6   Williams County Airport       
##  8 0G7   Finger Lakes Regional Airport 
##  9 0P2   Shoestring Aviation Airfield  
## 10 0S9   Jefferson County Intl         
## # ℹ 1,445 more rows
# 4. EXTRA CREDIT - (2 POINT2) - NO HELP - NO PARTIAL CREDIT
# Create a table with the names of the airports with the most 
# Join 'weather' and 'airports' based on the 'origin' column, then filter wind_speed > 30
# and select the airport name. Remove duplicates.
windy_airports <- weather %>%
  filter(wind_speed > 30) %>%
  inner_join(airports, by = c("origin" = "faa")) %>%
  select(name) %>%
  distinct()
# View the result for airports with wind speeds > 30
windy_airports
## # A tibble: 3 × 1
##   name               
##   <chr>              
## 1 Newark Liberty Intl
## 2 John F Kennedy Intl
## 3 La Guardia