# Create a vector with month namesmonth_names <-c("January", "February", "March", "April", "May", "June","July", "August", "September", "October", "November", "December")# Convert numeric month to character monthflights_months_name <- flights_month |>mutate(month_name = month_names[month])# View the updated dataset with the new month_name columnhead(flights_months_name)
use the dplyer function group_by to group the dataframe by month,and the corresponding average delay of flight.
# Group by month_name and summarizeflight_monthly_summary <- flights_months_name |>group_by(month_name) |>summarize(total_flights =n(),average_delay =mean(dep_delay, na.rm =TRUE) ) |>arrange(factor(month_name, levels = month_names))# View the summarized dataprint(flight_monthly_summary)
# A tibble: 12 × 3
month_name total_flights average_delay
<chr> <int> <dbl>
1 January 36020 14.0
2 February 34761 11.0
3 March 39514 13.0
4 April 37476 17.7
5 May 38710 8.39
6 June 35921 24.4
7 July 36211 30.5
8 August 36765 13.5
9 September 35505 17.3
10 October 36586 5.28
11 November 34521 4.40
12 December 33362 8.33
Plot a dual-axis plot of Total flights and Average Departure Delay per Month.
# Create a dual-axis plot# Order the months properlyflight_monthly_summary$month_name <-factor( flight_monthly_summary$month_name, levels =c('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'))ggplot(flight_monthly_summary, aes(x = month_name)) +geom_line(aes(y = total_flights, group =1, color ='Total Flights'), size =1) +geom_line(aes(y = average_delay *1000, group =1, color ='Average Delay'), size =1) +scale_y_continuous(name ="Total Flights",sec.axis =sec_axis(~ . /1000, name ="Average Delay (minutes)") ) +labs(title ="Total Flights and Average Delay per Month", x ="Month",caption ="FAA Aircraft Registry") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1)) +scale_color_manual(values =c('Total Flights'='#6C15F7', 'Average Delay'='#F71515'))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Summary of Visualization.
The visualization created is a dual-axis geom_line plot showcasing the total number of flights out of NYC airports and the average departure delay for each month in 2023. This plot effectively captures the seasonal fluctuations in both metrics, providing insights into the operational dynamics of NYC airports. One notable aspect of the plot is the pronounced increase in the total number of flights during the summer months, which coincides with the peak vacation season. Despite this increase, the average delay remains relatively stable, suggesting efficient airport management during high-traffic periods. In contrast, the winter months show a higher average delay, likely due to adverse weather conditions such as black ice and snow impacting runway operations. This visualization highlights the importance of seasonal considerations in airport management and offers a clear visual representation of how flight volume and delays interact throughout the year.