library(nycflights13)
library(dplyr)
library(ggplot2)
library(lubridate)How Weather Drives Flight Delays in NYC
1 Audience
This visualization is directed to airline operation teams and airport planners who want to understand which weather condition and time of day influence flight delays in New York City, in order to improve scheduling and reduce disruptions.
2 Background and Goal
Flight delays are a persistent issue in major metropolitan areas like New York City, where high traffic and variable weather conditions create complex operational challenges. While delays are often attributed to weather, the extent to which specific weather conditions and time of day contribute to delays is not always clear.
Main Objective: The objective of this analysis is to determine how weather conditions (visibility, wind speed, and precipitation) and time of day influence flight delays in NYC, and to identify when delays are most severe.
3 Data and Assumptions
3.1 Data
The analysis uses the nycflights13 datasets:
flightsweatherairports
3.2 Assumptions
- Flights can be matched to weather using
originandtime_hour - Missing delay values are excluded when analyzing delay outcomes.
- Hourly weather measurements are a resonable approximation of the condition affecting flights during each time period.
4 Data Preparation
flights_weather <- flights |>
select(year, month, day, hour, origin, dest, dep_delay, arr_delay, time_hour, distance) |>
left_join(weather, by = c("origin", "time_hour")) |>
filter(!is.na(dep_delay), !is.na(arr_delay))
summary(flights_weather) year.x month.x day.x hour.x
Min. :2013 Min. : 1.000 Min. : 1.00 Min. : 5.00
1st Qu.:2013 1st Qu.: 4.000 1st Qu.: 8.00 1st Qu.: 9.00
Median :2013 Median : 7.000 Median :16.00 Median :13.00
Mean :2013 Mean : 6.565 Mean :15.74 Mean :13.14
3rd Qu.:2013 3rd Qu.:10.000 3rd Qu.:23.00 3rd Qu.:17.00
Max. :2013 Max. :12.000 Max. :31.00 Max. :23.00
origin dest dep_delay arr_delay
Length:327346 Length:327346 Min. : -43.00 Min. : -86.000
Class :character Class :character 1st Qu.: -5.00 1st Qu.: -17.000
Mode :character Mode :character Median : -2.00 Median : -5.000
Mean : 12.56 Mean : 6.895
3rd Qu.: 11.00 3rd Qu.: 14.000
Max. :1301.00 Max. :1272.000
time_hour distance year.y month.y
Min. :2013-01-01 05:00:00 Min. : 80 Min. :2013 Min. : 1.000
1st Qu.:2013-04-05 06:00:00 1st Qu.: 509 1st Qu.:2013 1st Qu.: 4.000
Median :2013-07-04 09:00:00 Median : 888 Median :2013 Median : 7.000
Mean :2013-07-03 17:56:45 Mean :1048 Mean :2013 Mean : 6.547
3rd Qu.:2013-10-01 18:00:00 3rd Qu.:1389 3rd Qu.:2013 3rd Qu.: 9.000
Max. :2013-12-31 23:00:00 Max. :4983 Max. :2013 Max. :12.000
NA's :1527 NA's :1527
day.y hour.y temp dewp
Min. : 1.0 Min. : 5.00 Min. : 10.94 Min. :-9.94
1st Qu.: 8.0 1st Qu.: 9.00 1st Qu.: 42.08 1st Qu.:26.06
Median :16.0 Median :13.00 Median : 57.20 Median :42.80
Mean :15.7 Mean :13.13 Mean : 57.01 Mean :41.50
3rd Qu.:23.0 3rd Qu.:17.00 3rd Qu.: 71.96 3rd Qu.:57.92
Max. :31.0 Max. :23.00 Max. :100.04 Max. :78.08
NA's :1527 NA's :1527 NA's :1544 NA's :1544
humid wind_dir wind_speed wind_gust
Min. : 12.74 Min. : 0.0 Min. : 0.000 Min. :16.11
1st Qu.: 43.74 1st Qu.:130.0 1st Qu.: 6.905 1st Qu.:20.71
Median : 57.22 Median :220.0 Median :10.357 Median :24.17
Mean : 59.21 Mean :201.9 Mean :11.060 Mean :25.15
3rd Qu.: 74.67 3rd Qu.:290.0 3rd Qu.:14.960 3rd Qu.:27.62
Max. :100.00 Max. :360.0 Max. :42.579 Max. :66.75
NA's :1544 NA's :9574 NA's :1605 NA's :249912
precip pressure visib
Min. :0.00000 Min. : 983.8 Min. : 0.00
1st Qu.:0.00000 1st Qu.:1012.9 1st Qu.:10.00
Median :0.00000 Median :1017.6 Median :10.00
Mean :0.00421 Mean :1017.9 Mean : 9.29
3rd Qu.:0.00000 3rd Qu.:1022.9 3rd Qu.:10.00
Max. :1.21000 Max. :1042.1 Max. :10.00
NA's :1527 NA's :36142 NA's :1527
5 Initial Exploratory Data Analysis
5.1 Overall Delay Patterns
ggplot(flights_weather, aes(x = dep_delay)) +
geom_histogram(bins = 50) +
labs(
title = "Distribution of Departure Delays",
x = "Departure Delay (minutes)",
y = "Number of Flights"
)5.2 Departure Delay vs Arrival Delay
ggplot(flights_weather, aes(x = dep_delay, y = arr_delay)) +
geom_point(alpha = 0.2) +
geom_smooth(se = FALSE) +
labs(
title = "Relation Between Departure and Arrival Delays",
x = "Departure Delay (minutes)",
y = "Arrival Delay (minutes)"
)6 Time of Day Analysis
6.1 Average Departure Delay by Hour
flights_weather |>
mutate(flight_hour = hour.x) |>
group_by(flight_hour) |>
summarise(avg_dep_delay = mean(dep_delay, na.rm = TRUE), .groups = "drop") |>
ggplot(aes(x = flight_hour, y = avg_dep_delay)) +
geom_line() +
geom_point() +
labs(
title = "Average Departure Delay by Time of Day",
x = "Hour of Day",
y = "Average Departure Delay (minutes)"
)7 Weather-Based Analysis
7.1 Visibility and Delays
ggplot(flights_weather, aes(x = visib, y = dep_delay)) +
geom_point(alpha = 0.15) +
geom_smooth(se = FALSE) +
labs(
title = "Visibility vs Departure Delay",
x = "Visibility (miles)",
y = "Departure Delay (minutes)"
)7.2 Wind Speed and Delays
ggplot(flights_weather, aes(x = wind_speed, y = dep_delay)) +
geom_point(alpha = 0.15) +
geom_smooth(se = FALSE) +
labs(
title = "Wind Speed vs Departure Delay",
x = "Wind Speed",
y = "Departure Delay (minutes)"
)7.3 Precipitation and delays
ggplot(flights_weather, aes(x = precip, y = dep_delay)) +
geom_point(alpha = 0.15) +
geom_smooth(se = FALSE) +
labs(
title = "Precipitation vs Departure Delay",
x = "Precipitation",
y = "Departure Delay (minutes)"
)