library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
url <- "https://raw.githubusercontent.com/j-song-npc/607-Week-5-Assignment/refs/heads/main/607%20Assignment%205.csv"
Flights <- read.csv(url, stringsAsFactors = FALSE)
colnames(Flights) <- c("Airline", "Status", "Los Angeles", "Phoenix", "San Diego", "San Francisco", "Seattle")
Flights <- Flights [-3,]
Flights[2, "Airline"] <- "ALASKA"
Flights[4, "Airline"] <- "AM WEST"
Flights <- Flights %>%
mutate(across(c("Los Angeles", "Phoenix", "San Diego", "San Francisco", "Seattle"), as.character))
rownames(Flights) <- NULL
Flights_long <- Flights %>%
pivot_longer(cols = starts_with("Los Angeles"):starts_with("Seattle"),
names_to = "City",
values_to = "Flight Count")
Flights_long <- Flights_long %>%
mutate(`Flight Count` = as.numeric(`Flight Count`))
overall_delayed_rate <- Flights_long %>%
group_by(Airline) %>%
summarize(
total_flights = sum(`Flight Count`),
delayed_flights = sum(`Flight Count`[Status == "delayed"],
na.rm = TRUE)) %>%
mutate(rate = (delayed_flights / total_flights) * 100)
View(overall_delayed_rate)
city_delay_rate <- Flights_long %>%
group_by(Airline, City) %>%
summarize(
total_flights = sum(`Flight Count`),
delayed_flights = sum(`Flight Count`[Status == "delayed"],
na.rm = TRUE)) %>%
mutate(delay_rate = (delayed_flights / total_flights) * 100)
## `summarise()` has grouped output by 'Airline'. You can override using the
## `.groups` argument.
View(city_delay_rate)
Both Alaska and AM West flights have similar rates of delays (13% and 11%, respectively).
Interestingly, AM West has higher rates of delays across every city but this is the opposite when viewing overall numbers. This discrepancy is likely due to the large variances in flights numbers.