library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
airline_data <- read.csv("~/Downloads/attachments (1)/airline_delays.csv")
head(airline_data)
## Airline Status Los_Angeles Phoenix San_Diego San_Francisco Seattle
## 1 ALASKA on time 497 221 212 503 1841
## 2 ALASKA delayed 62 12 20 102 305
## 3 AM WEST on time 694 4840 383 320 201
## 4 AM WEST delayed 117 415 65 129 61
tidy_airline_data <- airline_data %>%
pivot_longer(cols = Los_Angeles:Seattle,
names_to = "City",
values_to = "Count")
head(tidy_airline_data)
## # A tibble: 6 × 4
## Airline Status City Count
## <chr> <chr> <chr> <int>
## 1 ALASKA on time Los_Angeles 497
## 2 ALASKA on time Phoenix 221
## 3 ALASKA on time San_Diego 212
## 4 ALASKA on time San_Francisco 503
## 5 ALASKA on time Seattle 1841
## 6 ALASKA delayed Los_Angeles 62
rate_data <- tidy_airline_data %>%
group_by(Airline, City) %>%
summarize(Total_OnTime = sum(Count[Status == "on time"]),
Total_Delayed = sum(Count[Status == "delayed"])) %>%
mutate(Delay_Rate = Total_Delayed / (Total_OnTime + Total_Delayed),
OnTime_Rate = Total_OnTime / (Total_OnTime + Total_Delayed))
## `summarise()` has grouped output by 'Airline'. You can override using the
## `.groups` argument.
head(rate_data)
## # A tibble: 6 × 6
## # Groups: Airline [2]
## Airline City Total_OnTime Total_Delayed Delay_Rate OnTime_Rate
## <chr> <chr> <int> <int> <dbl> <dbl>
## 1 ALASKA Los_Angeles 497 62 0.111 0.889
## 2 ALASKA Phoenix 221 12 0.0515 0.948
## 3 ALASKA San_Diego 212 20 0.0862 0.914
## 4 ALASKA San_Francisco 503 102 0.169 0.831
## 5 ALASKA Seattle 1841 305 0.142 0.858
## 6 AM WEST Los_Angeles 694 117 0.144 0.856
library(ggplot2)
long_data <- airline_data %>%
pivot_longer(cols = Los_Angeles:Seattle, names_to = "City", values_to = "Count")
ggplot(long_data, aes(x = City, y = Count, fill = Status)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ Airline) +
labs(title = "Flight Delays and On-Time Arrivals Across Cities",
x = "City", y = "Number of Flights") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
