install.packages(“nycflights13”) install.packages(“dplyr”) install.packages(“ggplot2”) # optional for plotting
library(nycflights13) library(dplyr)
clean_weather <- weather %>% na.omit() # removes all rows with NA values
daily_weather_avg <- clean_weather %>% group_by(year, month, day) %>% summarise( avg_wind_speed = mean(wind_speed), avg_wind_gust = mean(wind_gust), avg_precip = mean(precip), .groups = “drop” )
##Highest values max_wind_day <- daily_weather_avg %>% filter(avg_wind_speed == max(avg_wind_speed))
max_gust_day <- daily_weather_avg %>% filter(avg_wind_gust == max(avg_wind_gust))
max_precip_day <- daily_weather_avg %>% filter(avg_precip == max(avg_precip))
print(“Day with highest average wind speed:”) print(max_wind_day)
print(“Day with highest average wind gust:”) print(max_gust_day)
print(“Day with highest average precipitation:”) print(max_precip_day)
#3 ## departure delay daily_delay <- flights %>% group_by(year, month, day) %>% summarise(avg_dep_delay = mean(dep_delay, na.rm = TRUE), .groups = “drop”)
weather_delay_merged <- daily_weather_avg %>% inner_join(daily_delay, by = c(“year”, “month”, “day”))
cor_matrix <- weather_delay_merged %>% select(avg_wind_speed, avg_wind_gust, avg_precip, avg_dep_delay) %>% cor(use = “complete.obs”)
print(“Correlation matrix between weather conditions and average departure delay:”) print(cor_matrix)