Answer: The .CSV file created will be attached to the assignment file
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
## Load Data into R
Flight_ArrDelay <- read.csv(file="https://raw.githubusercontent.com/Badigun/Data-607-Assignments/refs/heads/main/Flights%20Arrival%3Adelays%20DATA%20607.csv", header = TRUE, sep = ",")
## View the loaded data to make sure it's loaded correctly
head (Flight_ArrDelay)
## X X.1 X.2 X.3
## 1 Destination Alaska (On Time) Alaska (Delayed) AM West (On Time)
## 2 Los Angeles 497 62 694
## 3 Phoenix 221 12 4840
## 4 San Diego 212 20 383
## 5 San Francisco 503 102 320
## 6 Seattle 1841 305 201
## X.4 X.5 X.6
## 1 AM West (Delayed) NA NA
## 2 117 NA NA
## 3 415 NA NA
## 4 65 NA NA
## 5 129 NA NA
## 6 61 NA NA
# Create the dataset Manually
FlightArrDelay <- tibble(
Destination = c("Los Angeles", "Phoenix", "San Diego", "San Francisco", "Seattle"),
`Alaska (On Time)` = c(497, 221, 212, 503, 1841),
`Alaska (Delayed)` = c(62, 12, 20, 102, 305),
`AM West (On Time)` = c(694, 4840, 383, 320, 201),
`AM West (Delayed)` = c(117, 415, 65, 129, 61)
)
# View the data
head(FlightArrDelay)
## # A tibble: 5 × 5
## Destination `Alaska (On Time)` `Alaska (Delayed)` `AM West (On Time)`
## <chr> <dbl> <dbl> <dbl>
## 1 Los Angeles 497 62 694
## 2 Phoenix 221 12 4840
## 3 San Diego 212 20 383
## 4 San Francisco 503 102 320
## 5 Seattle 1841 305 201
## # ℹ 1 more variable: `AM West (Delayed)` <dbl>
# (tidyr) Pivot the data to a long format for easier analysis
longer_FlightArrDelay <- FlightArrDelay %>%
pivot_longer(
cols = c(`Alaska (On Time)`, `Alaska (Delayed)`, `AM West (On Time)`, `AM West (Delayed)`),
names_to = c("Airline", "Status"),
names_pattern = "(.*) \\((.*)\\)",
values_to = "Flights"
)
# View the tidied data
head(longer_FlightArrDelay)
## # A tibble: 6 × 4
## Destination Airline Status Flights
## <chr> <chr> <chr> <dbl>
## 1 Los Angeles Alaska On Time 497
## 2 Los Angeles Alaska Delayed 62
## 3 Los Angeles AM West On Time 694
## 4 Los Angeles AM West Delayed 117
## 5 Phoenix Alaska On Time 221
## 6 Phoenix Alaska Delayed 12
# Filter the data for "On Time" flights
ontime_flights <- longer_FlightArrDelay %>%
filter(Status == "On Time")
# View the filtered data
head(ontime_flights)
## # A tibble: 6 × 4
## Destination Airline Status Flights
## <chr> <chr> <chr> <dbl>
## 1 Los Angeles Alaska On Time 497
## 2 Los Angeles AM West On Time 694
## 3 Phoenix Alaska On Time 221
## 4 Phoenix AM West On Time 4840
## 5 San Diego Alaska On Time 212
## 6 San Diego AM West On Time 383
# Filter the data for "Delayed" flights
Delayed_flights <- longer_FlightArrDelay %>%
filter(Status == "Delayed")
# View the filtered data
head(Delayed_flights)
## # A tibble: 6 × 4
## Destination Airline Status Flights
## <chr> <chr> <chr> <dbl>
## 1 Los Angeles Alaska Delayed 62
## 2 Los Angeles AM West Delayed 117
## 3 Phoenix Alaska Delayed 12
## 4 Phoenix AM West Delayed 415
## 5 San Diego Alaska Delayed 20
## 6 San Diego AM West Delayed 65
# View the total flights for each airline and status
totalflights <- longer_FlightArrDelay %>%
group_by(Airline, Status) %>%
summarise(Total_Flights = sum(Flights)) %>%
ungroup()
## `summarise()` has grouped output by 'Airline'. You can override using the
## `.groups` argument.
print(totalflights)
## # A tibble: 4 × 3
## Airline Status Total_Flights
## <chr> <chr> <dbl>
## 1 AM West Delayed 787
## 2 AM West On Time 6438
## 3 Alaska Delayed 501
## 4 Alaska On Time 3274
# Calculate the proportion of delayed flights for each airline
proportion_delayed <- longer_FlightArrDelay %>%
group_by(Airline) %>%
summarise(
Total_On_Time = sum(Flights[Status == "On Time"]),
Total_Delayed = sum(Flights[Status == "Delayed"]),
Total_Flights = sum(Flights),
Proportion_Delayed = Total_Delayed / Total_Flights
) %>%
ungroup()
print(proportion_delayed)
## # A tibble: 2 × 5
## Airline Total_On_Time Total_Delayed Total_Flights Proportion_Delayed
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 AM West 6438 787 7225 0.109
## 2 Alaska 3274 501 3775 0.133
# Calculate the average number of delayed flights for each airline
avg_delayed_flights <- longer_FlightArrDelay %>%
filter(Status == "Delayed") %>%
group_by(Airline) %>%
summarise(Average_Delayed_Flights = mean(Flights)) %>%
ungroup()
print(avg_delayed_flights)
## # A tibble: 2 × 2
## Airline Average_Delayed_Flights
## <chr> <dbl>
## 1 AM West 157.
## 2 Alaska 100.
The analysis of arrival delays for Alaska Airlines and AM West Airlines revealed the following:
Proportion of Delayed Flights: Alaska Airlines has a higher proportion of delayed flights (13.3%) compared to AM West Airlines (10.9%).
Average Number of Delayed Flights: AM West Airlines experiences, on average, more delayed flights (157.4 per destination) than Alaska Airlines (100.2 per destination).