Library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)
library(dplyr)
library(ggplot2)
Clean and organize the data
flights_clean <- flights_data[!apply(flights_data, 1, function(x) all(is.na(x) | x == "")),]
flights_clean
## X X.1 Los.Angels Phoenix San.Diego San.Francisco Seattle
## 1 ALASKA On Time 497 221 212 503 1,841
## 2 Delayed 62 12 20 102 305
## 4 AM West On Time 694 4,840 383 320 201
## 5 Delayed 117 415 65 129 61
flights_clean$X[2] <- "ALASKA"
flights_clean$X[4] <- "AM West"
flights_clean
## X X.1 Los.Angels Phoenix San.Diego San.Francisco Seattle
## 1 ALASKA On Time 497 221 212 503 1,841
## 2 ALASKA Delayed 62 12 20 102 305
## 4 AM West On Time 694 4,840 383 320 201
## 5 AM West Delayed 117 415 65 129 61
colnames(flights_clean)[1] <- "Airline"
colnames(flights_clean)[2] <- "Status"
colnames(flights_clean)
## [1] "Airline" "Status" "Los.Angels" "Phoenix"
## [5] "San.Diego" "San.Francisco" "Seattle"
flights_clean
## Airline Status Los.Angels Phoenix San.Diego San.Francisco Seattle
## 1 ALASKA On Time 497 221 212 503 1,841
## 2 ALASKA Delayed 62 12 20 102 305
## 4 AM West On Time 694 4,840 383 320 201
## 5 AM West Delayed 117 415 65 129 61
Filter for delayed flights and compare delayed flights from both
airlines
delayed_flights <- flights_reshape %>%
filter(Status == "Delayed")
summary_delays <- delayed_flights %>%
group_by(City, Airline) %>%
summarise(Total_Delays = sum(Flights),
.groups = 'drop')
summary_delays
## # A tibble: 10 × 3
## City Airline Total_Delays
## <chr> <chr> <dbl>
## 1 Los.Angels ALASKA 62
## 2 Los.Angels AM West 117
## 3 Phoenix ALASKA 12
## 4 Phoenix AM West 415
## 5 San.Diego ALASKA 20
## 6 San.Diego AM West 65
## 7 San.Francisco ALASKA 102
## 8 San.Francisco AM West 129
## 9 Seattle ALASKA 305
## 10 Seattle AM West 61
ggplot(data = summary_delays, aes(x = City, y = Total_Delays, fill = Airline)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = " Comparison of Delayed Airline by City",
x = "City",
y = "Total Delays") +
theme_minimal()

Filter on-time flights and compare on-time flights from both
airlines
ontime_flights <- flights_reshape %>%
filter(Status == "On Time")
summary_ontime <- ontime_flights %>%
group_by(City, Airline) %>%
summarise(Total_OnTime = sum(Flights),
.groups = 'drop')
summary_ontime
## # A tibble: 10 × 3
## City Airline Total_OnTime
## <chr> <chr> <dbl>
## 1 Los.Angels ALASKA 497
## 2 Los.Angels AM West 694
## 3 Phoenix ALASKA 221
## 4 Phoenix AM West 4840
## 5 San.Diego ALASKA 212
## 6 San.Diego AM West 383
## 7 San.Francisco ALASKA 503
## 8 San.Francisco AM West 320
## 9 Seattle ALASKA 1841
## 10 Seattle AM West 201
ggplot(data = summary_ontime, aes(x = City, y = Total_OnTime, fill = Airline)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = " Comparison of On-Time Airline by City",
x = "City",
y = "Total On-Time") +
theme_minimal()

Calculate the total and average delays for each airline across all
cities.
delay_analysis <- delayed_flights %>%
group_by(Airline) %>%
summarise(
Total_Delays = sum(Flights),
Avg_Delays = mean(Flights)
)
delay_analysis
## # A tibble: 2 × 3
## Airline Total_Delays Avg_Delays
## <chr> <dbl> <dbl>
## 1 ALASKA 501 100.
## 2 AM West 787 157.
Calculate the total and average on-time for each airline across all
cities.
ontime_analysis <- ontime_flights %>%
group_by(Airline) %>%
summarise(
Total_OnTime = sum(Flights),
Avg_OnTime = mean(Flights)
)
ontime_analysis
## # A tibble: 2 × 3
## Airline Total_OnTime Avg_OnTime
## <chr> <dbl> <dbl>
## 1 ALASKA 3274 655.
## 2 AM West 6438 1288.
Data showing total delayed & on-time percentage and average
delayed & on-time percentage
total_flights <- flights_reshape %>%
group_by(Airline, City) %>%
summarize(Total_Flights = sum (Flights),
.groups = 'drop')
total_flights
## # A tibble: 10 × 3
## Airline City Total_Flights
## <chr> <chr> <dbl>
## 1 ALASKA Los.Angels 559
## 2 ALASKA Phoenix 233
## 3 ALASKA San.Diego 232
## 4 ALASKA San.Francisco 605
## 5 ALASKA Seattle 2146
## 6 AM West Los.Angels 811
## 7 AM West Phoenix 5255
## 8 AM West San.Diego 448
## 9 AM West San.Francisco 449
## 10 AM West Seattle 262
flight_percentage <- flights_reshape %>%
left_join(total_flights, by = c("City", "Airline")) %>%
mutate(Percentage = (Flights / Total_Flights) * 100)
flight_percentage
## # A tibble: 20 × 6
## Airline Status City Flights Total_Flights Percentage
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 ALASKA On Time Los.Angels 497 559 88.9
## 2 ALASKA On Time Phoenix 221 233 94.8
## 3 ALASKA On Time San.Diego 212 232 91.4
## 4 ALASKA On Time San.Francisco 503 605 83.1
## 5 ALASKA On Time Seattle 1841 2146 85.8
## 6 ALASKA Delayed Los.Angels 62 559 11.1
## 7 ALASKA Delayed Phoenix 12 233 5.15
## 8 ALASKA Delayed San.Diego 20 232 8.62
## 9 ALASKA Delayed San.Francisco 102 605 16.9
## 10 ALASKA Delayed Seattle 305 2146 14.2
## 11 AM West On Time Los.Angels 694 811 85.6
## 12 AM West On Time Phoenix 4840 5255 92.1
## 13 AM West On Time San.Diego 383 448 85.5
## 14 AM West On Time San.Francisco 320 449 71.3
## 15 AM West On Time Seattle 201 262 76.7
## 16 AM West Delayed Los.Angels 117 811 14.4
## 17 AM West Delayed Phoenix 415 5255 7.90
## 18 AM West Delayed San.Diego 65 448 14.5
## 19 AM West Delayed San.Francisco 129 449 28.7
## 20 AM West Delayed Seattle 61 262 23.3
# calculating the percentage of delayed and on-time performance for ALASKA Airline
alaska_flights <- flights_reshape %>%
filter(Airline == "ALASKA")
alaska_flights
## # A tibble: 10 × 4
## Airline Status City Flights
## <chr> <chr> <chr> <dbl>
## 1 ALASKA On Time Los.Angels 497
## 2 ALASKA On Time Phoenix 221
## 3 ALASKA On Time San.Diego 212
## 4 ALASKA On Time San.Francisco 503
## 5 ALASKA On Time Seattle 1841
## 6 ALASKA Delayed Los.Angels 62
## 7 ALASKA Delayed Phoenix 12
## 8 ALASKA Delayed San.Diego 20
## 9 ALASKA Delayed San.Francisco 102
## 10 ALASKA Delayed Seattle 305
alaska_delayed <- alaska_flights %>%
filter(Status == "Delayed") %>%
summarize(Delayed_Flights = sum(Flights))
alaska_delayed
## # A tibble: 1 × 1
## Delayed_Flights
## <dbl>
## 1 501
alaska_ontime <- alaska_flights %>%
filter(Status == "On Time") %>%
summarize(OnTime_Flights = sum(Flights))
alaska_ontime
## # A tibble: 1 × 1
## OnTime_Flights
## <dbl>
## 1 3274
alaska_total <- alaska_flights %>%
summarize(Total_Flights = sum(Flights))
alaska_total
## # A tibble: 1 × 1
## Total_Flights
## <dbl>
## 1 3775
alaska_delayed_percentage <- (alaska_delayed$Delayed_Flights / alaska_total$Total_Flights) * 100
alaska_delayed_percentage
## [1] 13.27152
alaska_ontime_percentage <- (alaska_ontime$OnTime_Flights / alaska_total$Total_Flights) * 100
alaska_ontime_percentage
## [1] 86.72848
# Calculating the percentage of delayed and on-time performance for AM West Airline
amwest_flights <- flights_reshape %>%
filter(Airline == "AM West")
amwest_flights
## # A tibble: 10 × 4
## Airline Status City Flights
## <chr> <chr> <chr> <dbl>
## 1 AM West On Time Los.Angels 694
## 2 AM West On Time Phoenix 4840
## 3 AM West On Time San.Diego 383
## 4 AM West On Time San.Francisco 320
## 5 AM West On Time Seattle 201
## 6 AM West Delayed Los.Angels 117
## 7 AM West Delayed Phoenix 415
## 8 AM West Delayed San.Diego 65
## 9 AM West Delayed San.Francisco 129
## 10 AM West Delayed Seattle 61
amwest_delayed <- amwest_flights %>%
filter(Status == "Delayed") %>%
summarize(Delayed_Flights = sum(Flights))
amwest_delayed
## # A tibble: 1 × 1
## Delayed_Flights
## <dbl>
## 1 787
amwest_ontime <- amwest_flights %>%
filter(Status == "On Time") %>%
summarize(OnTime_Flights = sum(Flights))
amwest_ontime
## # A tibble: 1 × 1
## OnTime_Flights
## <dbl>
## 1 6438
amwest_total <- amwest_flights %>%
summarize(Total_Flights = sum(Flights))
amwest_total
## # A tibble: 1 × 1
## Total_Flights
## <dbl>
## 1 7225
amwest_delayed_percentage <- (amwest_delayed$Delayed_Flights / amwest_total$Total_Flights) * 100
amwest_delayed_percentage
## [1] 10.89273
amwest_ontime_percentage <- (amwest_ontime$OnTime_Flights / amwest_total$Total_Flights) * 100
amwest_ontime_percentage
## [1] 89.10727
# Comparing both airlines performance
airlines_analysis <- data.frame(
Airline = c("ALASKA", "AM West"),
Total_Flights = c(alaska_total$Total_Flights, amwest_total$Total_Flights),
Delayed_Flights = c(alaska_delayed$Delayed_Flights, amwest_delayed$Delayed_Flights),
OnTime_Flights = c(alaska_ontime$OnTime_Flights, amwest_ontime$OnTime_Flights),
Delayed_Percentage = c(alaska_delayed_percentage, amwest_delayed_percentage),
OnTime_Percentage = c(alaska_ontime_percentage, amwest_ontime_percentage)
)
airlines_analysis
## Airline Total_Flights Delayed_Flights OnTime_Flights Delayed_Percentage
## 1 ALASKA 3775 501 3274 13.27152
## 2 AM West 7225 787 6438 10.89273
## OnTime_Percentage
## 1 86.72848
## 2 89.10727
In Conclusion
Based on the analysis of the flights data for ALASKA and AM West, we
can conclude both airlines show a good performance with over 86% of
their flights arriving on time. However, AM West Airline appears to have
less delays and higher on-time flights. Therefore, AM West offer a
slightly more reliable service for passengers who priortized on-time
performance.