library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
airline_data <- read.csv("~/Downloads/attachments (1)/airline_delays.csv")
head(airline_data)
##   Airline  Status Los_Angeles Phoenix San_Diego San_Francisco Seattle
## 1  ALASKA on time         497     221       212           503    1841
## 2  ALASKA delayed          62      12        20           102     305
## 3 AM WEST on time         694    4840       383           320     201
## 4 AM WEST delayed         117     415        65           129      61
tidy_airline_data <- airline_data %>%
  pivot_longer(cols = Los_Angeles:Seattle, 
               names_to = "City", 
               values_to = "Count")
head(tidy_airline_data)
## # A tibble: 6 × 4
##   Airline Status  City          Count
##   <chr>   <chr>   <chr>         <int>
## 1 ALASKA  on time Los_Angeles     497
## 2 ALASKA  on time Phoenix         221
## 3 ALASKA  on time San_Diego       212
## 4 ALASKA  on time San_Francisco   503
## 5 ALASKA  on time Seattle        1841
## 6 ALASKA  delayed Los_Angeles      62
rate_data <- tidy_airline_data %>%
  group_by(Airline, City) %>%
  summarize(Total_OnTime = sum(Count[Status == "on time"]),
            Total_Delayed = sum(Count[Status == "delayed"])) %>%
  mutate(Delay_Rate = Total_Delayed / (Total_OnTime + Total_Delayed),
         OnTime_Rate = Total_OnTime / (Total_OnTime + Total_Delayed))
## `summarise()` has grouped output by 'Airline'. You can override using the
## `.groups` argument.
head(rate_data)
## # A tibble: 6 × 6
## # Groups:   Airline [2]
##   Airline City          Total_OnTime Total_Delayed Delay_Rate OnTime_Rate
##   <chr>   <chr>                <int>         <int>      <dbl>       <dbl>
## 1 ALASKA  Los_Angeles            497            62     0.111        0.889
## 2 ALASKA  Phoenix                221            12     0.0515       0.948
## 3 ALASKA  San_Diego              212            20     0.0862       0.914
## 4 ALASKA  San_Francisco          503           102     0.169        0.831
## 5 ALASKA  Seattle               1841           305     0.142        0.858
## 6 AM WEST Los_Angeles            694           117     0.144        0.856
library(ggplot2)
long_data <- airline_data %>%
  pivot_longer(cols = Los_Angeles:Seattle, names_to = "City", values_to = "Count")
ggplot(long_data, aes(x = City, y = Count, fill = Status)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~ Airline) +  
  labs(title = "Flight Delays and On-Time Arrivals Across Cities",
       x = "City", y = "Number of Flights") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))