library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# Read the data from the CSV file
flight_data <- read.csv("https://raw.githubusercontent.com/MRobinson112/assignment-5/main/flightdata.csv")

# view the initial rows of the dataset.
head(flight_data)
##   Airline          City One_Time_Arrivals Delayed_Arrivals  X
## 1  Alaska   Los Angeles               497               62 NA
## 2  Alaska       Phoenix               221               12 NA
## 3  Alaska     San Diego               212               20 NA
## 4  Alaska San Francisco               503              102 NA
## 5  Alaska       Seattle              1841              305 NA
## 6 AM West   Los Angeles               694              117 NA
# Seperate data by airline and calculate the mean delay
delay_summary <- flight_data %>%
  group_by(Airline) %>%
  summarize(
   Average_Delay = mean(Delayed_Arrivals, na.rm = TRUE),
    Total_Flights = n()
  )

# View the summary
delay_summary
## # A tibble: 2 × 3
##   Airline Average_Delay Total_Flights
##   <chr>           <dbl>         <int>
## 1 AM West          157.             5
## 2 Alaska           100.             5
# Determine the percentage of flights delay for each airline.
delay_percentages <- flight_data %>%
  group_by(Airline) %>%
  summarise(
    Percent_Delayed = (sum(Delayed_Arrivals) / sum(Delayed_Arrivals + One_Time_Arrivals)) * 100
  )

ggplot(delay_percentages, aes(x = Airline, y = Percent_Delayed, fill = Airline)) +
  geom_bar(stat = "identity") +
  labs(title = "Percentage of Flight Delays by Airline",
       x = "Airline",
       y = "Percentage of Delay  (%)") +
  theme_minimal()

# Conclusion

Based on the summary data and the plot, it is evident that Alaska Airlines has the highest average percentage of flight delays among the airlines in the dataset.