untidy_data <- "https://raw.githubusercontent.com/Kingtilon1/DATA607/main/assignment5/flights.csv"
untidy <- data.frame(read.csv(untidy_data, header=TRUE, sep="\t"))
untidy
##         X     X.1 Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1  ALASKA on time         497     221       212           503    1841
## 2         delayed          62      12        20           102     305
## 3                          NA      NA        NA            NA      NA
## 4 AM WEST on time         694    4840       383           320     201
## 5         delayed         117     415        65           129      61

I am going to give the table a name

colnames(untidy)[1] <- "Airline"
colnames(untidy)[2] <-"Tardiness"

I will now drop the row that has the na values

untidy <- drop_na(untidy)

fill the empty rows with NA

untidy[untidy==""] <-NA
tidy <- untidy %>% fill(Airline)
tidy
##   Airline Tardiness Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1  ALASKA   on time         497     221       212           503    1841
## 2  ALASKA   delayed          62      12        20           102     305
## 3 AM WEST   on time         694    4840       383           320     201
## 4 AM WEST   delayed         117     415        65           129      61

using the pivot_longe function to reshape the data frame to a long format so that the columns from 3 to 7 are all organized under one column, as is their respective values

tidy <- pivot_longer(tidy, cols = 3:7, names_to = "city", values_to = "Frequency")

replacing all the . in between the cities

tidy$city <- str_replace_all(tidy$city, "\\.", " ")
tidy
## # A tibble: 20 × 4
##    Airline Tardiness city          Frequency
##    <chr>   <chr>     <chr>             <int>
##  1 ALASKA  on time   Los Angeles         497
##  2 ALASKA  on time   Phoenix             221
##  3 ALASKA  on time   San Diego           212
##  4 ALASKA  on time   San Francisco       503
##  5 ALASKA  on time   Seattle            1841
##  6 ALASKA  delayed   Los Angeles          62
##  7 ALASKA  delayed   Phoenix              12
##  8 ALASKA  delayed   San Diego            20
##  9 ALASKA  delayed   San Francisco       102
## 10 ALASKA  delayed   Seattle             305
## 11 AM WEST on time   Los Angeles         694
## 12 AM WEST on time   Phoenix            4840
## 13 AM WEST on time   San Diego           383
## 14 AM WEST on time   San Francisco       320
## 15 AM WEST on time   Seattle             201
## 16 AM WEST delayed   Los Angeles         117
## 17 AM WEST delayed   Phoenix             415
## 18 AM WEST delayed   San Diego            65
## 19 AM WEST delayed   San Francisco       129
## 20 AM WEST delayed   Seattle              61

By calclating the total amount of flights and the total amount of delayed, and ontime flight, I can calculate the on time rate and the delayed rate for each airline

airline_percentage <-
  tidy %>% 
    group_by(Airline) %>%
      summarise(
        num_flights = sum(Frequency),
        num_delays = sum(ifelse(Tardiness=='delayed', Frequency, 0))
      )

airline_percentage <- 
  airline_percentage %>%
    mutate(
        num_on_time = num_flights - num_delays,
        delay_rate = num_delays / num_flights,
        on_time_rate = (num_flights - num_delays) / num_flights
    ) %>%
      arrange(delay_rate)
airline_percentage
## # A tibble: 2 × 6
##   Airline num_flights num_delays num_on_time delay_rate on_time_rate
##   <chr>         <int>      <dbl>       <dbl>      <dbl>        <dbl>
## 1 AM WEST        7225        787        6438      0.109        0.891
## 2 ALASKA         3775        501        3274      0.133        0.867

Conclusion: AM West on time rate : 89.1%, delayed rate: 10.8%.

Alaska Airlines on time rate : 86.7%, delayed rate: 13.3%.

Alaska Airlines on time rate: 86.7 percent, delayed rate 13.2% Based off of this data, I can conclude, that on average, AM WEST Airlines is on time 3% more than Alaska airlines for a total of 89%.As well as the fact that AM WEST Airlines has a lower delay rate than Alaska Airlines which is also lower by 2 percent for a total of 11 percent if you round up.