Here is an answer to Question 1. The anti-join shows all flights whose destination airport code doesnt exist in the dataset.
anti_join(flights, airports, by = c("dest" = "faa"))
This anti-join shows all airports in the airports dataset that never appear in the flights dataset.
anti_join(airports, flights, by = c("faa" = "dest"))
The day in 2013 with the longest average total delay and total departure delay is March 8th.
delay_by_day <- flights %>% mutate(date = make_date(year, month, day), total_delay = arr_delay + dep_delay) %>% group_by(date) %>% summarize(avg_total_delay = mean(total_delay, na.rm = TRUE)) %>% arrange(desc(avg_total_delay))
delay_by_day %>% slice(1)
glimpse(weather)
## Rows: 26,115
## Columns: 15
## $ origin <chr> "EWR", "EWR", "EWR", "EWR", "EWR", "EWR", "EWR", "EWR", "EW…
## $ year <int> 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013,…
## $ month <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ day <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ hour <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, …
## $ temp <dbl> 39.02, 39.02, 39.02, 39.92, 39.02, 37.94, 39.02, 39.92, 39.…
## $ dewp <dbl> 26.06, 26.96, 28.04, 28.04, 28.04, 28.04, 28.04, 28.04, 28.…
## $ humid <dbl> 59.37, 61.63, 64.43, 62.21, 64.43, 67.21, 64.43, 62.21, 62.…
## $ wind_dir <dbl> 270, 250, 240, 250, 260, 240, 240, 250, 260, 260, 260, 330,…
## $ wind_speed <dbl> 10.35702, 8.05546, 11.50780, 12.65858, 12.65858, 11.50780, …
## $ wind_gust <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 20.…
## $ precip <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ pressure <dbl> 1012.0, 1012.3, 1012.5, 1012.2, 1011.9, 1012.4, 1012.2, 101…
## $ visib <dbl> 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,…
## $ time_hour <dttm> 2013-01-01 01:00:00, 2013-01-01 02:00:00, 2013-01-01 03:00…
Using planes and flights, the airplanes with the fastest average speed were the large commercial jets which showed the highest speed in the dataset.
plane_speed <- flights %>% filter(!is.na(air_time), air_time
> 0) %>% mutate(speed = distance / (air_time / 60)) %>%
inner_join(planes, by = “tailnum”) %>% group_by(tailnum, model)
%>% summarize(avg_speed = mean(speed, na.rm = TRUE), .groups =
“drop”) %>% arrange(desc(avg_speed))
plane_speed %>% slice(1:10)
library(dplyr) library(nycflights13)
plane_speed <- flights %>% filter(!is.na(air_time), air_time
> 0) %>% mutate(speed = distance / (air_time / 60)) %>%
inner_join(planes, by = “tailnum”) %>% group_by(tailnum, model)
%>% summarize(avg_speed = mean(speed, na.rm = TRUE), .groups =
“drop”) %>% arrange(desc(avg_speed))
plane_speed %>% slice(1:10)
library(dplyr) library(nycflights13)
plane_speed <- flights %>% # join first, keeping only planes that exist in planes table inner_join(planes %>% select(tailnum, model), by = “tailnum”) %>% # filter after the join filter(!is.na(air_time), air_time > 0) %>% # compute speed safely mutate(speed = distance / (air_time / 60)) %>% # compute average speed per tail number + model group_by(tailnum, model) %>% summarize(avg_speed = mean(speed, na.rm = TRUE), .groups = “drop”) %>% arrange(desc(avg_speed))
plane_speed %>% slice(1:10)
glimpse(planes)
## Rows: 3,322
## Columns: 9
## $ tailnum <chr> "N10156", "N102UW", "N103US", "N104UW", "N10575", "N105UW…
## $ year <int> 2004, 1998, 1999, 1999, 2002, 1999, 1999, 1999, 1999, 199…
## $ type <chr> "Fixed wing multi engine", "Fixed wing multi engine", "Fi…
## $ manufacturer <chr> "EMBRAER", "AIRBUS INDUSTRIE", "AIRBUS INDUSTRIE", "AIRBU…
## $ model <chr> "EMB-145XR", "A320-214", "A320-214", "A320-214", "EMB-145…
## $ engines <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ seats <int> 55, 182, 182, 182, 55, 182, 182, 182, 182, 182, 55, 55, 5…
## $ speed <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ engine <chr> "Turbo-fan", "Turbo-fan", "Turbo-fan", "Turbo-fan", "Turb…
There is no relationship between a planes age and its delays.
planes %>%
group_by(year) %>%
summarise(count = n())
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(nycflights13)
anti_join(flights, airports, by = c("dest" = "faa"))
anti_join(airports, flights, by = c("faa" = "dest"))
glimpse(weather)
glimpse(planes)
planes %>%
group_by(year) %>%
summarise(count = n())