library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
url <- "https://raw.githubusercontent.com/Yedzinovich/Data-607/main/assignment-4.csv"
flights <- read_csv(url, show_col_types = FALSE)
print(flights)
## # A tibble: 4 × 7
## Airline Status `Los Angeles` Phoenix `San Diego` `San Francisco` Seattle
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Alaska On Time 497 221 212 503 1841
## 2 Alaska Delay 62 12 20 102 305
## 3 AM West On Time 694 4840 383 320 201
## 4 AM West Delay 117 415 65 129 61
kable(flights)
Alaska |
On Time |
497 |
221 |
212 |
503 |
1841 |
Alaska |
Delay |
62 |
12 |
20 |
102 |
305 |
AM West |
On Time |
694 |
4840 |
383 |
320 |
201 |
AM West |
Delay |
117 |
415 |
65 |
129 |
61 |
flights_long <- flights %>%
pivot_longer(cols = starts_with("Los Angeles"):starts_with("Seattle"), names_to = "City", values_to = "Flights")
print(flights_long)
## # A tibble: 20 × 4
## Airline Status City Flights
## <chr> <chr> <chr> <dbl>
## 1 Alaska On Time Los Angeles 497
## 2 Alaska On Time Phoenix 221
## 3 Alaska On Time San Diego 212
## 4 Alaska On Time San Francisco 503
## 5 Alaska On Time Seattle 1841
## 6 Alaska Delay Los Angeles 62
## 7 Alaska Delay Phoenix 12
## 8 Alaska Delay San Diego 20
## 9 Alaska Delay San Francisco 102
## 10 Alaska Delay Seattle 305
## 11 AM West On Time Los Angeles 694
## 12 AM West On Time Phoenix 4840
## 13 AM West On Time San Diego 383
## 14 AM West On Time San Francisco 320
## 15 AM West On Time Seattle 201
## 16 AM West Delay Los Angeles 117
## 17 AM West Delay Phoenix 415
## 18 AM West Delay San Diego 65
## 19 AM West Delay San Francisco 129
## 20 AM West Delay Seattle 61
kable(flights_long)
Alaska |
On Time |
Los Angeles |
497 |
Alaska |
On Time |
Phoenix |
221 |
Alaska |
On Time |
San Diego |
212 |
Alaska |
On Time |
San Francisco |
503 |
Alaska |
On Time |
Seattle |
1841 |
Alaska |
Delay |
Los Angeles |
62 |
Alaska |
Delay |
Phoenix |
12 |
Alaska |
Delay |
San Diego |
20 |
Alaska |
Delay |
San Francisco |
102 |
Alaska |
Delay |
Seattle |
305 |
AM West |
On Time |
Los Angeles |
694 |
AM West |
On Time |
Phoenix |
4840 |
AM West |
On Time |
San Diego |
383 |
AM West |
On Time |
San Francisco |
320 |
AM West |
On Time |
Seattle |
201 |
AM West |
Delay |
Los Angeles |
117 |
AM West |
Delay |
Phoenix |
415 |
AM West |
Delay |
San Diego |
65 |
AM West |
Delay |
San Francisco |
129 |
AM West |
Delay |
Seattle |
61 |
flights_final_transformation = flights_long %>% pivot_wider(names_from = Status, values_from = Flights)
print(flights_final_transformation)
## # A tibble: 10 × 4
## Airline City `On Time` Delay
## <chr> <chr> <dbl> <dbl>
## 1 Alaska Los Angeles 497 62
## 2 Alaska Phoenix 221 12
## 3 Alaska San Diego 212 20
## 4 Alaska San Francisco 503 102
## 5 Alaska Seattle 1841 305
## 6 AM West Los Angeles 694 117
## 7 AM West Phoenix 4840 415
## 8 AM West San Diego 383 65
## 9 AM West San Francisco 320 129
## 10 AM West Seattle 201 61
kable(flights_final_transformation)
Alaska |
Los Angeles |
497 |
62 |
Alaska |
Phoenix |
221 |
12 |
Alaska |
San Diego |
212 |
20 |
Alaska |
San Francisco |
503 |
102 |
Alaska |
Seattle |
1841 |
305 |
AM West |
Los Angeles |
694 |
117 |
AM West |
Phoenix |
4840 |
415 |
AM West |
San Diego |
383 |
65 |
AM West |
San Francisco |
320 |
129 |
AM West |
Seattle |
201 |
61 |
flights_summary <- flights_final_transformation %>%
mutate(Delay = as.numeric(Delay)) %>%
group_by(Airline) %>%
summarize(Total_Delays = sum(Delay, na.rm = TRUE), Total_On_Time = sum(`On Time`, na.rm = TRUE)) %>%
mutate(Total_Flights = Total_Delays + Total_On_Time,
Delay_Percentage = (Total_Delays / Total_Flights) * 100,
On_Time_Percentage = (Total_On_Time / Total_Flights) * 100)
print(flights_summary)
## # A tibble: 2 × 6
## Airline Total_Delays Total_On_Time Total_Flights Delay_Percentage
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 AM West 787 6438 7225 10.9
## 2 Alaska 501 3274 3775 13.3
## # ℹ 1 more variable: On_Time_Percentage <dbl>
kable(flights_summary)
AM West |
787 |
6438 |
7225 |
10.89273 |
89.10727 |
Alaska |
501 |
3274 |
3775 |
13.27152 |
86.72848 |
Conclusion
- AM West has a higher total number of flights and a slightly better
on-time performance compared to Alaska.
- Alaska has a higher percentage of delays compared to AM West.
Based on the provided information, AM West seems to be the better
choice for me the following reasons:
- Higher Total Number of Flights which means that AM West has a more
extensive network and more frequent flights.
- Better On-Time Performance means that AM West is more reliable.
- Lower Percentage of Delays since Alaska tends to have a higher
percentage of delays compare to AM West, so flying with AM West reduces
the possibility of having delays.