#Exercises 13.2.1

#1 I would need the longitude and latitude for the origin and the destination airports of each flight.I would need to combine the flights and airports tables considering the flights table has the origin (origin) and destination (dest) airport for each flight, and the airports table has the longitude (lon) and latitude (lat) of each airport. I would combine with inner_join().

library("tidyverse")
## Warning: package 'tidyverse' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("nycflights13")
## Warning: package 'nycflights13' was built under R version 4.4.3
variable.names(airports)
## [1] "faa"   "name"  "lat"   "lon"   "alt"   "tz"    "dst"   "tzone"
flights_route <- flights %>%
  inner_join(select(airports,
                    origin = faa, 
                    origin_lat = lat, 
                    origin_lon = lon),
             by = "origin") %>%
  inner_join(select(airports,
                    dest = faa,
                    dest_lat = lat,
                    dest_lon = lon),
             by = "dest") %>%
  select("origin", "dest", "origin_lat", "origin_lon", "dest_lat", "dest_lon")

flights_route
## # A tibble: 329,174 × 6
##    origin dest  origin_lat origin_lon dest_lat dest_lon
##    <chr>  <chr>      <dbl>      <dbl>    <dbl>    <dbl>
##  1 EWR    IAH         40.7      -74.2     30.0    -95.3
##  2 LGA    IAH         40.8      -73.9     30.0    -95.3
##  3 JFK    MIA         40.6      -73.8     25.8    -80.3
##  4 LGA    ATL         40.8      -73.9     33.6    -84.4
##  5 EWR    ORD         40.7      -74.2     42.0    -87.9
##  6 EWR    FLL         40.7      -74.2     26.1    -80.2
##  7 LGA    IAD         40.8      -73.9     38.9    -77.5
##  8 JFK    MCO         40.6      -73.8     28.4    -81.3
##  9 LGA    ORD         40.8      -73.9     42.0    -87.9
## 10 JFK    PBI         40.6      -73.8     26.7    -80.1
## # ℹ 329,164 more rows

#2 There is connection between origin in the weather table and origin that can be linked to faa(foreign key) in the airports table. The column airports\(faa is a foreign key of weather\)origin.

variable.names(airports)
## [1] "faa"   "name"  "lat"   "lon"   "alt"   "tz"    "dst"   "tzone"
variable.names(weather)
##  [1] "origin"     "year"       "month"      "day"        "hour"      
##  [6] "temp"       "dewp"       "humid"      "wind_dir"   "wind_speed"
## [11] "wind_gust"  "precip"     "pressure"   "visib"      "time_hour"

#3 The keys which are year, month, day, hour, origin from the weather table are the foreign keys, and the keys which are year, month, day, hour, dest from the flights table are the primary keys. That would also match the key connecting weather with destinations.

#4 There are off-seasons and peak seasons for flight trips. Usually, people travel a lot during summer break or special holidays since the kids are out of school and family trips are easier to plan. Not many people travel during the school semester. The off-season is usually from February to May, and among them, only weekdays are grouped to assign a new table, and the new table and the existing table are joined.

#13.3.1 Exercises

#1 Adding a surrogate key to flights

flights %>%
  arrange(year, month, day, sched_dep_time, carrier, flight) %>%
  mutate(flight_sur = row_number()) %>%
  glimpse()
## Rows: 336,776
## Columns: 20
## $ year           <int> 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2…
## $ month          <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ day            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ dep_time       <int> 517, 533, 542, 544, 554, 559, 558, 559, 558, 558, 557, …
## $ sched_dep_time <int> 515, 529, 540, 545, 558, 559, 600, 600, 600, 600, 600, …
## $ dep_delay      <dbl> 2, 4, 2, -1, -4, 0, -2, -1, -2, -2, -3, NA, 1, 0, -5, -…
## $ arr_time       <int> 830, 850, 923, 1004, 740, 702, 753, 941, 849, 853, 838,…
## $ sched_arr_time <int> 819, 830, 850, 1022, 728, 706, 745, 910, 851, 856, 846,…
## $ arr_delay      <dbl> 11, 20, 33, -18, 12, -4, 8, 31, -2, -3, -8, NA, -6, -7,…
## $ carrier        <chr> "UA", "UA", "AA", "B6", "UA", "B6", "AA", "AA", "B6", "…
## $ flight         <int> 1545, 1714, 1141, 725, 1696, 1806, 301, 707, 49, 71, 79…
## $ tailnum        <chr> "N14228", "N24211", "N619AA", "N804JB", "N39463", "N708…
## $ origin         <chr> "EWR", "LGA", "JFK", "JFK", "EWR", "JFK", "LGA", "LGA",…
## $ dest           <chr> "IAH", "IAH", "MIA", "BQN", "ORD", "BOS", "ORD", "DFW",…
## $ air_time       <dbl> 227, 227, 160, 183, 150, 44, 138, 257, 149, 158, 140, N…
## $ distance       <dbl> 1400, 1416, 1089, 1576, 719, 187, 733, 1389, 1028, 1005…
## $ hour           <dbl> 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6…
## $ minute         <dbl> 15, 29, 40, 45, 58, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ time_hour      <dttm> 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 0…
## $ flight_sur     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, …

#2 The primary key is (playerID).

library(Lahman)
Lahman::Batting %>%
  count(playerID) %>%
  filter(n > 1) %>%
  nrow()
## [1] 15422

#The primary keys are (year, sex, name).

options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("babynames")
## Installing package into 'C:/Users/jacin/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'babynames' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\jacin\AppData\Local\Temp\Rtmpuo3svr\downloaded_packages
library(tidyverse)
library(babynames)
## Warning: package 'babynames' was built under R version 4.4.3
babynames::babynames %>%
  count(year, sex, name) %>%
  filter(n > 1) %>%
  nrow()
## [1] 0

#The primary keys are (lat, long, year, month).

install.packages("nasaweather")
## Installing package into 'C:/Users/jacin/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'nasaweather' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\jacin\AppData\Local\Temp\Rtmpuo3svr\downloaded_packages
library(nasaweather)
## Warning: package 'nasaweather' was built under R version 4.4.3
## 
## Attaching package: 'nasaweather'
## The following object is masked from 'package:dplyr':
## 
##     storms
nasaweather::atmos %>%
  count(lat, long, year, month) %>%
  filter(n > 1) %>%
  nrow()
## [1] 0

#The primary key is (id).

install.packages("fueleconomy")
## Installing package into 'C:/Users/jacin/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'fueleconomy' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\jacin\AppData\Local\Temp\Rtmpuo3svr\downloaded_packages
library(fueleconomy)
## Warning: package 'fueleconomy' was built under R version 4.4.3
fueleconomy::vehicles %>%
  count(id) %>%
  filter(n > 1) %>%
  nrow()
## [1] 0

#The primary keys are (carat, cut, color, clarity).

library(ggplot2)
ggplot2::diamonds %>%
  count(carat, cut, color, clarity) %>%
  filter(n > 1) %>%
  nrow()
## [1] 8080

#3. Lahman::Batting and Lahman::Master are combined by playerID #Lahman::Batting and Lahman::Salaries are combined by playerID, yearID #Lahman::Master and Lahman::Salaries are combined by playerID #Lahman::Master and Lahman::Managers are combined by playerID #Lahman::Master and Lahman::AwardsManagers are combined by playerID #All of which are connected by playerID, yearID, stint

#13.4.6

#1

library(maps)
## Warning: package 'maps' was built under R version 4.4.3
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
airports %>%
  semi_join(flights, c("faa" = "dest")) %>%
  ggplot(aes(lon, lat)) +
    borders("state") +
    geom_point() +
    coord_quickmap()

avg_delay <- flights %>%
  group_by(dest) %>%
  summarise(delay = mean(arr_delay, na.rm = TRUE)) %>%
  inner_join(airports, by = c(dest = "faa"))

avg_delay %>%
  ggplot(aes(lon, lat, color = delay)) + #average delay denoted by legend colors
  borders("state") +
  geom_point() +
  coord_quickmap()

#2

location <- airports %>%
  select(lat, lon, faa)
location
## # A tibble: 1,458 × 3
##      lat    lon faa  
##    <dbl>  <dbl> <chr>
##  1  41.1  -80.6 04G  
##  2  32.5  -85.7 06A  
##  3  42.0  -88.1 06C  
##  4  41.4  -74.4 06N  
##  5  31.1  -81.4 09J  
##  6  36.4  -82.2 0A9  
##  7  41.5  -84.5 0G6  
##  8  42.9  -76.8 0G7  
##  9  39.8  -76.6 0P2  
## 10  48.1 -123.  0S9  
## # ℹ 1,448 more rows
flights %>%
  select(origin, dest) %>%
  inner_join(location, by = c(origin = "faa")) %>%
  inner_join(location, by = c(dest = "faa"))
## # A tibble: 329,174 × 6
##    origin dest  lat.x lon.x lat.y lon.y
##    <chr>  <chr> <dbl> <dbl> <dbl> <dbl>
##  1 EWR    IAH    40.7 -74.2  30.0 -95.3
##  2 LGA    IAH    40.8 -73.9  30.0 -95.3
##  3 JFK    MIA    40.6 -73.8  25.8 -80.3
##  4 LGA    ATL    40.8 -73.9  33.6 -84.4
##  5 EWR    ORD    40.7 -74.2  42.0 -87.9
##  6 EWR    FLL    40.7 -74.2  26.1 -80.2
##  7 LGA    IAD    40.8 -73.9  38.9 -77.5
##  8 JFK    MCO    40.6 -73.8  28.4 -81.3
##  9 LGA    ORD    40.8 -73.9  42.0 -87.9
## 10 JFK    PBI    40.6 -73.8  26.7 -80.1
## # ℹ 329,164 more rows

#3

plane_age <- planes %>%
  select(year, tailnum) %>%
  mutate(age =  2021 - year)
plane_age
## # A tibble: 3,322 × 3
##     year tailnum   age
##    <int> <chr>   <dbl>
##  1  2004 N10156     17
##  2  1998 N102UW     23
##  3  1999 N103US     22
##  4  1999 N104UW     22
##  5  2002 N10575     19
##  6  1999 N105UW     22
##  7  1999 N107US     22
##  8  1999 N108UW     22
##  9  1999 N109UW     22
## 10  1999 N110UW     22
## # ℹ 3,312 more rows
relation <- flights %>%
  select(tailnum, arr_delay) %>%
  group_by(tailnum) %>%
  summarise(delay = mean(arr_delay, na.rm = TRUE)) %>%
  inner_join(plane_age, by = c(tailnum = "tailnum"))
relation
## # A tibble: 3,322 × 4
##    tailnum  delay  year   age
##    <chr>    <dbl> <int> <dbl>
##  1 N10156  12.7    2004    17
##  2 N102UW   2.94   1998    23
##  3 N103US  -6.93   1999    22
##  4 N104UW   1.80   1999    22
##  5 N10575  20.7    2002    19
##  6 N105UW  -0.267  1999    22
##  7 N107US  -5.73   1999    22
##  8 N108UW  -1.25   1999    22
##  9 N109UW  -2.52   1999    22
## 10 N110UW   2.8    1999    22
## # ℹ 3,312 more rows
relation %>%
  ggplot(aes(x = age, y = delay)) +
  geom_point(color = "darkgreen") +
  xlim(0, 40) + #created a range
  ylim(0, 400)
## Warning: Removed 940 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Looks as though planes that are roughly 10 to 20 years old have slightly higher delays.

#4 If it is too cold or too hot, the plane tends to be delayed based on the plot.

weather_relation <- flights %>%
  inner_join(weather, by = c(origin = "origin",
    year = "year",
    month = "month",
    day = "day",
    hour = "hour"))
weather_relation
## # A tibble: 335,220 × 29
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ℹ 335,210 more rows
## # ℹ 21 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour.x <dttm>, temp <dbl>, dewp <dbl>,
## #   humid <dbl>, wind_dir <dbl>, wind_speed <dbl>, wind_gust <dbl>,
## #   precip <dbl>, pressure <dbl>, visib <dbl>, time_hour.y <dttm>
weather_relation %>%
  group_by(temp) %>%
  summarise(weather_delay = mean(arr_delay, na.rm = TRUE)) %>%
  ggplot(aes(x = temp, y = weather_delay)) +
  geom_point(color = "darkgreen") + 
  geom_line(color = "green")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

#5 The large light blue bubble stands out in the NC,TN, KY area, and many planes in the area appear to have a delay of more than 150 minutes. After searching on Google, there was a really bad storm that day.

June_13_2013 <- flights %>%
  filter(year == 2013, month == 6, day == 13) %>%
  inner_join(weather, by = c(origin = "origin",
    year = "year",
    month = "month",
    day = "day"))
## Warning in inner_join(., weather, by = c(origin = "origin", year = "year", : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 12610 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
June_13_2013
## # A tibble: 23,736 × 30
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     6    13        3           2255        68      120             19
##  2  2013     6    13        3           2255        68      120             19
##  3  2013     6    13        3           2255        68      120             19
##  4  2013     6    13        3           2255        68      120             19
##  5  2013     6    13        3           2255        68      120             19
##  6  2013     6    13        3           2255        68      120             19
##  7  2013     6    13        3           2255        68      120             19
##  8  2013     6    13        3           2255        68      120             19
##  9  2013     6    13        3           2255        68      120             19
## 10  2013     6    13        3           2255        68      120             19
## # ℹ 23,726 more rows
## # ℹ 22 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour.x <dbl>, minute <dbl>, time_hour.x <dttm>, hour.y <int>, temp <dbl>,
## #   dewp <dbl>, humid <dbl>, wind_dir <dbl>, wind_speed <dbl>, wind_gust <dbl>,
## #   precip <dbl>, pressure <dbl>, visib <dbl>, time_hour.y <dttm>
June_13_2013 %>%
  group_by(dest) %>%
  summarise(June_13_delay = mean(arr_delay, na.rm = TRUE)) %>%
  inner_join(airports, by = c(dest = "faa")) %>%
  ggplot(aes(y = lat, x = lon)) +
  borders("state") +
  geom_point(aes(size = June_13_delay, color = June_13_delay)) +
  coord_quickmap()
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

#13.5.1 Exercises

#1 There are 2,512 missing tailnum data in the flight table. In a missing of tailnum the dep_time, dep_delay, arr_time, arr_delay, and air_time are all missing. Possibly meaning that there was a cancellation of the flight.

flights %>%
  filter(is.na(tailnum))
## # A tibble: 2,512 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     2       NA           1545        NA       NA           1910
##  2  2013     1     2       NA           1601        NA       NA           1735
##  3  2013     1     3       NA            857        NA       NA           1209
##  4  2013     1     3       NA            645        NA       NA            952
##  5  2013     1     4       NA            845        NA       NA           1015
##  6  2013     1     4       NA           1830        NA       NA           2044
##  7  2013     1     5       NA            840        NA       NA           1001
##  8  2013     1     7       NA            820        NA       NA            958
##  9  2013     1     8       NA           1645        NA       NA           1838
## 10  2013     1     9       NA            755        NA       NA           1012
## # ℹ 2,502 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>

#2

flights %>%
  filter(!is.na(tailnum)) %>%     # !is.na means "except missing tailnum data"
  group_by(tailnum) %>%
  count() %>%
  filter(n >= 100)
## # A tibble: 1,217 × 2
## # Groups:   tailnum [1,217]
##    tailnum     n
##    <chr>   <int>
##  1 N0EGMQ    371
##  2 N10156    153
##  3 N10575    289
##  4 N11106    129
##  5 N11107    148
##  6 N11109    148
##  7 N11113    138
##  8 N11119    148
##  9 N11121    154
## 10 N11127    124
## # ℹ 1,207 more rows

#3

variable.names(fueleconomy::vehicles)
##  [1] "id"    "make"  "model" "year"  "class" "trans" "drive" "cyl"   "displ"
## [10] "fuel"  "hwy"   "cty"
variable.names(fueleconomy::common)
## [1] "make"  "model" "n"     "years"
fueleconomy::vehicles %>%
  inner_join(fueleconomy::common, by = c(make = "make", model = "model")) %>%
  select(make, model) %>%
  group_by(model) %>%
  count() %>%
  arrange(desc(n))
## # A tibble: 342 × 2
## # Groups:   model [342]
##    model                 n
##    <chr>             <int>
##  1 F150 Pickup 2WD     200
##  2 Truck 2WD           187
##  3 F150 Pickup 4WD     178
##  4 Ranger Pickup 2WD   169
##  5 Mustang             153
##  6 Jetta               150
##  7 Sierra 1500 2WD     149
##  8 Sierra 1500 4WD     149
##  9 Civic               142
## 10 Eclipse             133
## # ℹ 332 more rows

#The most common model is F150 Pickup 2WD.

#4 I will find and visualize the worst delays related in the weather. Since the worst delay is 1,301 minutes which is only about ~22 hours.

flights %>%
  select(arr_delay, dep_delay, year:day, hour, origin) %>%
  arrange(desc(dep_delay)) %>%
  inner_join(weather, by = c(year = "year",
                             month = "month",
                             day = "day",
                             hour = "hour",
                             origin = "origin")) %>%
  ggplot(aes(x = wind_dir, y = precip, color = temp)) +
  geom_point(shape = "diamond")
## Warning: Removed 8240 rows containing missing values or values outside the scale range
## (`geom_point()`).

#5 anti_join(flights, airports, by = c(“dest” = “faa”)) shows only 4 destinations which are BQN, PSE, SJU, and STT which are Aguadilla, Puerto Rico (BQN), Ponce, Puerto Rico (PSE), San Juan, Puerto Rico (SJU), and St. Thomas (STT).

anti_join(flights, airports, by = c("dest" = "faa"))%>%
  count(dest) 
## # A tibble: 4 × 2
##   dest      n
##   <chr> <int>
## 1 BQN     896
## 2 PSE     365
## 3 SJU    5819
## 4 STT     522
anti_join(airports, flights, by = c("faa" = "dest")) %>%
  count(faa)
## # A tibble: 1,357 × 2
##    faa       n
##    <chr> <int>
##  1 04G       1
##  2 06A       1
##  3 06C       1
##  4 06N       1
##  5 09J       1
##  6 0A9       1
##  7 0G6       1
##  8 0G7       1
##  9 0P2       1
## 10 0S9       1
## # ℹ 1,347 more rows

#While anti_join(airports, flights, by = c(“faa” = “dest”)) shows every airport in the U.S.

#6 I expect that there’s an implicit relationship between plane and airline. Based on the data below.

nycflights13::planes 
## # A tibble: 3,322 × 9
##    tailnum  year type              manufacturer model engines seats speed engine
##    <chr>   <int> <chr>             <chr>        <chr>   <int> <int> <int> <chr> 
##  1 N10156   2004 Fixed wing multi… EMBRAER      EMB-…       2    55    NA Turbo…
##  2 N102UW   1998 Fixed wing multi… AIRBUS INDU… A320…       2   182    NA Turbo…
##  3 N103US   1999 Fixed wing multi… AIRBUS INDU… A320…       2   182    NA Turbo…
##  4 N104UW   1999 Fixed wing multi… AIRBUS INDU… A320…       2   182    NA Turbo…
##  5 N10575   2002 Fixed wing multi… EMBRAER      EMB-…       2    55    NA Turbo…
##  6 N105UW   1999 Fixed wing multi… AIRBUS INDU… A320…       2   182    NA Turbo…
##  7 N107US   1999 Fixed wing multi… AIRBUS INDU… A320…       2   182    NA Turbo…
##  8 N108UW   1999 Fixed wing multi… AIRBUS INDU… A320…       2   182    NA Turbo…
##  9 N109UW   1999 Fixed wing multi… AIRBUS INDU… A320…       2   182    NA Turbo…
## 10 N110UW   1999 Fixed wing multi… AIRBUS INDU… A320…       2   182    NA Turbo…
## # ℹ 3,312 more rows
nycflights13::airlines
## # A tibble: 16 × 2
##    carrier name                       
##    <chr>   <chr>                      
##  1 9E      Endeavor Air Inc.          
##  2 AA      American Airlines Inc.     
##  3 AS      Alaska Airlines Inc.       
##  4 B6      JetBlue Airways            
##  5 DL      Delta Air Lines Inc.       
##  6 EV      ExpressJet Airlines Inc.   
##  7 F9      Frontier Airlines Inc.     
##  8 FL      AirTran Airways Corporation
##  9 HA      Hawaiian Airlines Inc.     
## 10 MQ      Envoy Air                  
## 11 OO      SkyWest Airlines Inc.      
## 12 UA      United Air Lines Inc.      
## 13 US      US Airways Inc.            
## 14 VX      Virgin America             
## 15 WN      Southwest Airlines Co.     
## 16 YV      Mesa Airlines Inc.
nycflights13::flights
## # A tibble: 336,776 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ℹ 336,766 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>
flights %>%
  inner_join(planes, by = c(tailnum = "tailnum")) %>%
  inner_join(airlines, by = c(carrier = "carrier")) %>%
  select(tailnum, carrier, name)
## # A tibble: 284,170 × 3
##    tailnum carrier name                    
##    <chr>   <chr>   <chr>                   
##  1 N14228  UA      United Air Lines Inc.   
##  2 N24211  UA      United Air Lines Inc.   
##  3 N619AA  AA      American Airlines Inc.  
##  4 N804JB  B6      JetBlue Airways         
##  5 N668DN  DL      Delta Air Lines Inc.    
##  6 N39463  UA      United Air Lines Inc.   
##  7 N516JB  B6      JetBlue Airways         
##  8 N829AS  EV      ExpressJet Airlines Inc.
##  9 N593JB  B6      JetBlue Airways         
## 10 N793JB  B6      JetBlue Airways         
## # ℹ 284,160 more rows