#Exercises 13.2.1
#1 I would need the longitude and latitude for the origin and the destination airports of each flight.I would need to combine the flights and airports tables considering the flights table has the origin (origin) and destination (dest) airport for each flight, and the airports table has the longitude (lon) and latitude (lat) of each airport. I would combine with inner_join().
library("tidyverse")
## Warning: package 'tidyverse' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("nycflights13")
## Warning: package 'nycflights13' was built under R version 4.4.3
variable.names(airports)
## [1] "faa" "name" "lat" "lon" "alt" "tz" "dst" "tzone"
flights_route <- flights %>%
inner_join(select(airports,
origin = faa,
origin_lat = lat,
origin_lon = lon),
by = "origin") %>%
inner_join(select(airports,
dest = faa,
dest_lat = lat,
dest_lon = lon),
by = "dest") %>%
select("origin", "dest", "origin_lat", "origin_lon", "dest_lat", "dest_lon")
flights_route
## # A tibble: 329,174 × 6
## origin dest origin_lat origin_lon dest_lat dest_lon
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 EWR IAH 40.7 -74.2 30.0 -95.3
## 2 LGA IAH 40.8 -73.9 30.0 -95.3
## 3 JFK MIA 40.6 -73.8 25.8 -80.3
## 4 LGA ATL 40.8 -73.9 33.6 -84.4
## 5 EWR ORD 40.7 -74.2 42.0 -87.9
## 6 EWR FLL 40.7 -74.2 26.1 -80.2
## 7 LGA IAD 40.8 -73.9 38.9 -77.5
## 8 JFK MCO 40.6 -73.8 28.4 -81.3
## 9 LGA ORD 40.8 -73.9 42.0 -87.9
## 10 JFK PBI 40.6 -73.8 26.7 -80.1
## # ℹ 329,164 more rows
#2 There is connection between origin in the weather table and origin that can be linked to faa(foreign key) in the airports table. The column airports\(faa is a foreign key of weather\)origin.
variable.names(airports)
## [1] "faa" "name" "lat" "lon" "alt" "tz" "dst" "tzone"
variable.names(weather)
## [1] "origin" "year" "month" "day" "hour"
## [6] "temp" "dewp" "humid" "wind_dir" "wind_speed"
## [11] "wind_gust" "precip" "pressure" "visib" "time_hour"
#3 The keys which are year, month, day, hour, origin from the weather table are the foreign keys, and the keys which are year, month, day, hour, dest from the flights table are the primary keys. That would also match the key connecting weather with destinations.
#4 There are off-seasons and peak seasons for flight trips. Usually, people travel a lot during summer break or special holidays since the kids are out of school and family trips are easier to plan. Not many people travel during the school semester. The off-season is usually from February to May, and among them, only weekdays are grouped to assign a new table, and the new table and the existing table are joined.
#13.3.1 Exercises
#1 Adding a surrogate key to flights
flights %>%
arrange(year, month, day, sched_dep_time, carrier, flight) %>%
mutate(flight_sur = row_number()) %>%
glimpse()
## Rows: 336,776
## Columns: 20
## $ year <int> 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2…
## $ month <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ day <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ dep_time <int> 517, 533, 542, 544, 554, 559, 558, 559, 558, 558, 557, …
## $ sched_dep_time <int> 515, 529, 540, 545, 558, 559, 600, 600, 600, 600, 600, …
## $ dep_delay <dbl> 2, 4, 2, -1, -4, 0, -2, -1, -2, -2, -3, NA, 1, 0, -5, -…
## $ arr_time <int> 830, 850, 923, 1004, 740, 702, 753, 941, 849, 853, 838,…
## $ sched_arr_time <int> 819, 830, 850, 1022, 728, 706, 745, 910, 851, 856, 846,…
## $ arr_delay <dbl> 11, 20, 33, -18, 12, -4, 8, 31, -2, -3, -8, NA, -6, -7,…
## $ carrier <chr> "UA", "UA", "AA", "B6", "UA", "B6", "AA", "AA", "B6", "…
## $ flight <int> 1545, 1714, 1141, 725, 1696, 1806, 301, 707, 49, 71, 79…
## $ tailnum <chr> "N14228", "N24211", "N619AA", "N804JB", "N39463", "N708…
## $ origin <chr> "EWR", "LGA", "JFK", "JFK", "EWR", "JFK", "LGA", "LGA",…
## $ dest <chr> "IAH", "IAH", "MIA", "BQN", "ORD", "BOS", "ORD", "DFW",…
## $ air_time <dbl> 227, 227, 160, 183, 150, 44, 138, 257, 149, 158, 140, N…
## $ distance <dbl> 1400, 1416, 1089, 1576, 719, 187, 733, 1389, 1028, 1005…
## $ hour <dbl> 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6…
## $ minute <dbl> 15, 29, 40, 45, 58, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ time_hour <dttm> 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 0…
## $ flight_sur <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, …
#2 The primary key is (playerID).
library(Lahman)
Lahman::Batting %>%
count(playerID) %>%
filter(n > 1) %>%
nrow()
## [1] 15422
#The primary keys are (year, sex, name).
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("babynames")
## Installing package into 'C:/Users/jacin/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'babynames' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\jacin\AppData\Local\Temp\Rtmpuo3svr\downloaded_packages
library(tidyverse)
library(babynames)
## Warning: package 'babynames' was built under R version 4.4.3
babynames::babynames %>%
count(year, sex, name) %>%
filter(n > 1) %>%
nrow()
## [1] 0
#The primary keys are (lat, long, year, month).
install.packages("nasaweather")
## Installing package into 'C:/Users/jacin/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'nasaweather' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\jacin\AppData\Local\Temp\Rtmpuo3svr\downloaded_packages
library(nasaweather)
## Warning: package 'nasaweather' was built under R version 4.4.3
##
## Attaching package: 'nasaweather'
## The following object is masked from 'package:dplyr':
##
## storms
nasaweather::atmos %>%
count(lat, long, year, month) %>%
filter(n > 1) %>%
nrow()
## [1] 0
#The primary key is (id).
install.packages("fueleconomy")
## Installing package into 'C:/Users/jacin/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'fueleconomy' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\jacin\AppData\Local\Temp\Rtmpuo3svr\downloaded_packages
library(fueleconomy)
## Warning: package 'fueleconomy' was built under R version 4.4.3
fueleconomy::vehicles %>%
count(id) %>%
filter(n > 1) %>%
nrow()
## [1] 0
#The primary keys are (carat, cut, color, clarity).
library(ggplot2)
ggplot2::diamonds %>%
count(carat, cut, color, clarity) %>%
filter(n > 1) %>%
nrow()
## [1] 8080
#3. Lahman::Batting and Lahman::Master are combined by playerID #Lahman::Batting and Lahman::Salaries are combined by playerID, yearID #Lahman::Master and Lahman::Salaries are combined by playerID #Lahman::Master and Lahman::Managers are combined by playerID #Lahman::Master and Lahman::AwardsManagers are combined by playerID #All of which are connected by playerID, yearID, stint
#13.4.6
#1
library(maps)
## Warning: package 'maps' was built under R version 4.4.3
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
airports %>%
semi_join(flights, c("faa" = "dest")) %>%
ggplot(aes(lon, lat)) +
borders("state") +
geom_point() +
coord_quickmap()
avg_delay <- flights %>%
group_by(dest) %>%
summarise(delay = mean(arr_delay, na.rm = TRUE)) %>%
inner_join(airports, by = c(dest = "faa"))
avg_delay %>%
ggplot(aes(lon, lat, color = delay)) + #average delay denoted by legend colors
borders("state") +
geom_point() +
coord_quickmap()
#2
location <- airports %>%
select(lat, lon, faa)
location
## # A tibble: 1,458 × 3
## lat lon faa
## <dbl> <dbl> <chr>
## 1 41.1 -80.6 04G
## 2 32.5 -85.7 06A
## 3 42.0 -88.1 06C
## 4 41.4 -74.4 06N
## 5 31.1 -81.4 09J
## 6 36.4 -82.2 0A9
## 7 41.5 -84.5 0G6
## 8 42.9 -76.8 0G7
## 9 39.8 -76.6 0P2
## 10 48.1 -123. 0S9
## # ℹ 1,448 more rows
flights %>%
select(origin, dest) %>%
inner_join(location, by = c(origin = "faa")) %>%
inner_join(location, by = c(dest = "faa"))
## # A tibble: 329,174 × 6
## origin dest lat.x lon.x lat.y lon.y
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 EWR IAH 40.7 -74.2 30.0 -95.3
## 2 LGA IAH 40.8 -73.9 30.0 -95.3
## 3 JFK MIA 40.6 -73.8 25.8 -80.3
## 4 LGA ATL 40.8 -73.9 33.6 -84.4
## 5 EWR ORD 40.7 -74.2 42.0 -87.9
## 6 EWR FLL 40.7 -74.2 26.1 -80.2
## 7 LGA IAD 40.8 -73.9 38.9 -77.5
## 8 JFK MCO 40.6 -73.8 28.4 -81.3
## 9 LGA ORD 40.8 -73.9 42.0 -87.9
## 10 JFK PBI 40.6 -73.8 26.7 -80.1
## # ℹ 329,164 more rows
#3
plane_age <- planes %>%
select(year, tailnum) %>%
mutate(age = 2021 - year)
plane_age
## # A tibble: 3,322 × 3
## year tailnum age
## <int> <chr> <dbl>
## 1 2004 N10156 17
## 2 1998 N102UW 23
## 3 1999 N103US 22
## 4 1999 N104UW 22
## 5 2002 N10575 19
## 6 1999 N105UW 22
## 7 1999 N107US 22
## 8 1999 N108UW 22
## 9 1999 N109UW 22
## 10 1999 N110UW 22
## # ℹ 3,312 more rows
relation <- flights %>%
select(tailnum, arr_delay) %>%
group_by(tailnum) %>%
summarise(delay = mean(arr_delay, na.rm = TRUE)) %>%
inner_join(plane_age, by = c(tailnum = "tailnum"))
relation
## # A tibble: 3,322 × 4
## tailnum delay year age
## <chr> <dbl> <int> <dbl>
## 1 N10156 12.7 2004 17
## 2 N102UW 2.94 1998 23
## 3 N103US -6.93 1999 22
## 4 N104UW 1.80 1999 22
## 5 N10575 20.7 2002 19
## 6 N105UW -0.267 1999 22
## 7 N107US -5.73 1999 22
## 8 N108UW -1.25 1999 22
## 9 N109UW -2.52 1999 22
## 10 N110UW 2.8 1999 22
## # ℹ 3,312 more rows
relation %>%
ggplot(aes(x = age, y = delay)) +
geom_point(color = "darkgreen") +
xlim(0, 40) + #created a range
ylim(0, 400)
## Warning: Removed 940 rows containing missing values or values outside the scale range
## (`geom_point()`).
#Looks as though planes that are roughly 10 to 20 years old have slightly higher delays.
#4 If it is too cold or too hot, the plane tends to be delayed based on the plot.
weather_relation <- flights %>%
inner_join(weather, by = c(origin = "origin",
year = "year",
month = "month",
day = "day",
hour = "hour"))
weather_relation
## # A tibble: 335,220 × 29
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## 7 2013 1 1 555 600 -5 913 854
## 8 2013 1 1 557 600 -3 709 723
## 9 2013 1 1 557 600 -3 838 846
## 10 2013 1 1 558 600 -2 753 745
## # ℹ 335,210 more rows
## # ℹ 21 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour.x <dttm>, temp <dbl>, dewp <dbl>,
## # humid <dbl>, wind_dir <dbl>, wind_speed <dbl>, wind_gust <dbl>,
## # precip <dbl>, pressure <dbl>, visib <dbl>, time_hour.y <dttm>
weather_relation %>%
group_by(temp) %>%
summarise(weather_delay = mean(arr_delay, na.rm = TRUE)) %>%
ggplot(aes(x = temp, y = weather_delay)) +
geom_point(color = "darkgreen") +
geom_line(color = "green")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
#5 The large light blue bubble stands out in the NC,TN, KY area, and many planes in the area appear to have a delay of more than 150 minutes. After searching on Google, there was a really bad storm that day.
June_13_2013 <- flights %>%
filter(year == 2013, month == 6, day == 13) %>%
inner_join(weather, by = c(origin = "origin",
year = "year",
month = "month",
day = "day"))
## Warning in inner_join(., weather, by = c(origin = "origin", year = "year", : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 12610 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
June_13_2013
## # A tibble: 23,736 × 30
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 6 13 3 2255 68 120 19
## 2 2013 6 13 3 2255 68 120 19
## 3 2013 6 13 3 2255 68 120 19
## 4 2013 6 13 3 2255 68 120 19
## 5 2013 6 13 3 2255 68 120 19
## 6 2013 6 13 3 2255 68 120 19
## 7 2013 6 13 3 2255 68 120 19
## 8 2013 6 13 3 2255 68 120 19
## 9 2013 6 13 3 2255 68 120 19
## 10 2013 6 13 3 2255 68 120 19
## # ℹ 23,726 more rows
## # ℹ 22 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour.x <dbl>, minute <dbl>, time_hour.x <dttm>, hour.y <int>, temp <dbl>,
## # dewp <dbl>, humid <dbl>, wind_dir <dbl>, wind_speed <dbl>, wind_gust <dbl>,
## # precip <dbl>, pressure <dbl>, visib <dbl>, time_hour.y <dttm>
June_13_2013 %>%
group_by(dest) %>%
summarise(June_13_delay = mean(arr_delay, na.rm = TRUE)) %>%
inner_join(airports, by = c(dest = "faa")) %>%
ggplot(aes(y = lat, x = lon)) +
borders("state") +
geom_point(aes(size = June_13_delay, color = June_13_delay)) +
coord_quickmap()
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
#13.5.1 Exercises
#1 There are 2,512 missing tailnum data in the flight table. In a missing of tailnum the dep_time, dep_delay, arr_time, arr_delay, and air_time are all missing. Possibly meaning that there was a cancellation of the flight.
flights %>%
filter(is.na(tailnum))
## # A tibble: 2,512 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 2 NA 1545 NA NA 1910
## 2 2013 1 2 NA 1601 NA NA 1735
## 3 2013 1 3 NA 857 NA NA 1209
## 4 2013 1 3 NA 645 NA NA 952
## 5 2013 1 4 NA 845 NA NA 1015
## 6 2013 1 4 NA 1830 NA NA 2044
## 7 2013 1 5 NA 840 NA NA 1001
## 8 2013 1 7 NA 820 NA NA 958
## 9 2013 1 8 NA 1645 NA NA 1838
## 10 2013 1 9 NA 755 NA NA 1012
## # ℹ 2,502 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
#2
flights %>%
filter(!is.na(tailnum)) %>% # !is.na means "except missing tailnum data"
group_by(tailnum) %>%
count() %>%
filter(n >= 100)
## # A tibble: 1,217 × 2
## # Groups: tailnum [1,217]
## tailnum n
## <chr> <int>
## 1 N0EGMQ 371
## 2 N10156 153
## 3 N10575 289
## 4 N11106 129
## 5 N11107 148
## 6 N11109 148
## 7 N11113 138
## 8 N11119 148
## 9 N11121 154
## 10 N11127 124
## # ℹ 1,207 more rows
#3
variable.names(fueleconomy::vehicles)
## [1] "id" "make" "model" "year" "class" "trans" "drive" "cyl" "displ"
## [10] "fuel" "hwy" "cty"
variable.names(fueleconomy::common)
## [1] "make" "model" "n" "years"
fueleconomy::vehicles %>%
inner_join(fueleconomy::common, by = c(make = "make", model = "model")) %>%
select(make, model) %>%
group_by(model) %>%
count() %>%
arrange(desc(n))
## # A tibble: 342 × 2
## # Groups: model [342]
## model n
## <chr> <int>
## 1 F150 Pickup 2WD 200
## 2 Truck 2WD 187
## 3 F150 Pickup 4WD 178
## 4 Ranger Pickup 2WD 169
## 5 Mustang 153
## 6 Jetta 150
## 7 Sierra 1500 2WD 149
## 8 Sierra 1500 4WD 149
## 9 Civic 142
## 10 Eclipse 133
## # ℹ 332 more rows
#The most common model is F150 Pickup 2WD.
#4 I will find and visualize the worst delays related in the weather. Since the worst delay is 1,301 minutes which is only about ~22 hours.
flights %>%
select(arr_delay, dep_delay, year:day, hour, origin) %>%
arrange(desc(dep_delay)) %>%
inner_join(weather, by = c(year = "year",
month = "month",
day = "day",
hour = "hour",
origin = "origin")) %>%
ggplot(aes(x = wind_dir, y = precip, color = temp)) +
geom_point(shape = "diamond")
## Warning: Removed 8240 rows containing missing values or values outside the scale range
## (`geom_point()`).
#5 anti_join(flights, airports, by = c(“dest” = “faa”)) shows only 4 destinations which are BQN, PSE, SJU, and STT which are Aguadilla, Puerto Rico (BQN), Ponce, Puerto Rico (PSE), San Juan, Puerto Rico (SJU), and St. Thomas (STT).
anti_join(flights, airports, by = c("dest" = "faa"))%>%
count(dest)
## # A tibble: 4 × 2
## dest n
## <chr> <int>
## 1 BQN 896
## 2 PSE 365
## 3 SJU 5819
## 4 STT 522
anti_join(airports, flights, by = c("faa" = "dest")) %>%
count(faa)
## # A tibble: 1,357 × 2
## faa n
## <chr> <int>
## 1 04G 1
## 2 06A 1
## 3 06C 1
## 4 06N 1
## 5 09J 1
## 6 0A9 1
## 7 0G6 1
## 8 0G7 1
## 9 0P2 1
## 10 0S9 1
## # ℹ 1,347 more rows
#While anti_join(airports, flights, by = c(“faa” = “dest”)) shows every airport in the U.S.
#6 I expect that there’s an implicit relationship between plane and airline. Based on the data below.
nycflights13::planes
## # A tibble: 3,322 × 9
## tailnum year type manufacturer model engines seats speed engine
## <chr> <int> <chr> <chr> <chr> <int> <int> <int> <chr>
## 1 N10156 2004 Fixed wing multi… EMBRAER EMB-… 2 55 NA Turbo…
## 2 N102UW 1998 Fixed wing multi… AIRBUS INDU… A320… 2 182 NA Turbo…
## 3 N103US 1999 Fixed wing multi… AIRBUS INDU… A320… 2 182 NA Turbo…
## 4 N104UW 1999 Fixed wing multi… AIRBUS INDU… A320… 2 182 NA Turbo…
## 5 N10575 2002 Fixed wing multi… EMBRAER EMB-… 2 55 NA Turbo…
## 6 N105UW 1999 Fixed wing multi… AIRBUS INDU… A320… 2 182 NA Turbo…
## 7 N107US 1999 Fixed wing multi… AIRBUS INDU… A320… 2 182 NA Turbo…
## 8 N108UW 1999 Fixed wing multi… AIRBUS INDU… A320… 2 182 NA Turbo…
## 9 N109UW 1999 Fixed wing multi… AIRBUS INDU… A320… 2 182 NA Turbo…
## 10 N110UW 1999 Fixed wing multi… AIRBUS INDU… A320… 2 182 NA Turbo…
## # ℹ 3,312 more rows
nycflights13::airlines
## # A tibble: 16 × 2
## carrier name
## <chr> <chr>
## 1 9E Endeavor Air Inc.
## 2 AA American Airlines Inc.
## 3 AS Alaska Airlines Inc.
## 4 B6 JetBlue Airways
## 5 DL Delta Air Lines Inc.
## 6 EV ExpressJet Airlines Inc.
## 7 F9 Frontier Airlines Inc.
## 8 FL AirTran Airways Corporation
## 9 HA Hawaiian Airlines Inc.
## 10 MQ Envoy Air
## 11 OO SkyWest Airlines Inc.
## 12 UA United Air Lines Inc.
## 13 US US Airways Inc.
## 14 VX Virgin America
## 15 WN Southwest Airlines Co.
## 16 YV Mesa Airlines Inc.
nycflights13::flights
## # A tibble: 336,776 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## 7 2013 1 1 555 600 -5 913 854
## 8 2013 1 1 557 600 -3 709 723
## 9 2013 1 1 557 600 -3 838 846
## 10 2013 1 1 558 600 -2 753 745
## # ℹ 336,766 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
flights %>%
inner_join(planes, by = c(tailnum = "tailnum")) %>%
inner_join(airlines, by = c(carrier = "carrier")) %>%
select(tailnum, carrier, name)
## # A tibble: 284,170 × 3
## tailnum carrier name
## <chr> <chr> <chr>
## 1 N14228 UA United Air Lines Inc.
## 2 N24211 UA United Air Lines Inc.
## 3 N619AA AA American Airlines Inc.
## 4 N804JB B6 JetBlue Airways
## 5 N668DN DL Delta Air Lines Inc.
## 6 N39463 UA United Air Lines Inc.
## 7 N516JB B6 JetBlue Airways
## 8 N829AS EV ExpressJet Airlines Inc.
## 9 N593JB B6 JetBlue Airways
## 10 N793JB B6 JetBlue Airways
## # ℹ 284,160 more rows