prep system

install.packages(“nycflights13”) install.packages(“dplyr”) install.packages(“ggplot2”) # optional for plotting

library(nycflights13) library(dplyr)

1

clean_weather <- weather %>% na.omit() # removes all rows with NA values

2

daily_weather_avg <- clean_weather %>% group_by(year, month, day) %>% summarise( avg_wind_speed = mean(wind_speed), avg_wind_gust = mean(wind_gust), avg_precip = mean(precip), .groups = “drop” )

##Highest values max_wind_day <- daily_weather_avg %>% filter(avg_wind_speed == max(avg_wind_speed))

max_gust_day <- daily_weather_avg %>% filter(avg_wind_gust == max(avg_wind_gust))

max_precip_day <- daily_weather_avg %>% filter(avg_precip == max(avg_precip))

print(“Day with highest average wind speed:”) print(max_wind_day)

print(“Day with highest average wind gust:”) print(max_gust_day)

print(“Day with highest average precipitation:”) print(max_precip_day)

#3 ## departure delay daily_delay <- flights %>% group_by(year, month, day) %>% summarise(avg_dep_delay = mean(dep_delay, na.rm = TRUE), .groups = “drop”)

combine weather and delay data

weather_delay_merged <- daily_weather_avg %>% inner_join(daily_delay, by = c(“year”, “month”, “day”))

Check

cor_matrix <- weather_delay_merged %>% select(avg_wind_speed, avg_wind_gust, avg_precip, avg_dep_delay) %>% cor(use = “complete.obs”)

print(“Correlation matrix between weather conditions and average departure delay:”) print(cor_matrix)