title: “Assignment5” author: “Anna Moy” date: “2024-02-25” output: html_document
library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)
airline <- read_csv("https://raw.githubusercontent.com/AnnaMoy/Data-607/main/airline.csv")
airline
## # A tibble: 5 × 7
## ...1 ...2 `Los Angeles` Phoenix `San Diego` `San Francisco` Seattle
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALASKA on time 497 221 212 503 1841
## 2 <NA> delayed 62 12 20 102 305
## 3 <NA> <NA> NA NA NA NA NA
## 4 AM WEST on time 694 4840 383 320 201
## 5 <NA> delayed 117 415 65 129 61
airline <- airline[-3,]
airline %>%
fill('...1', .direction = "down")
## # A tibble: 4 × 7
## ...1 ...2 `Los Angeles` Phoenix `San Diego` `San Francisco` Seattle
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALASKA on time 497 221 212 503 1841
## 2 ALASKA delayed 62 12 20 102 305
## 3 AM WEST on time 694 4840 383 320 201
## 4 AM WEST delayed 117 415 65 129 61
airline <- airline %>%
fill('...1', .direction = "down") %>%
pivot_longer(-c('...1', '...2'), names_to = "location") %>%
pivot_wider(names_from ='...2', values_from = value) %>%
rename("airline"= '...1', "on_time" = 'on time')
airline
## # A tibble: 10 × 4
## airline location on_time delayed
## <chr> <chr> <dbl> <dbl>
## 1 ALASKA Los Angeles 497 62
## 2 ALASKA Phoenix 221 12
## 3 ALASKA San Diego 212 20
## 4 ALASKA San Francisco 503 102
## 5 ALASKA Seattle 1841 305
## 6 AM WEST Los Angeles 694 117
## 7 AM WEST Phoenix 4840 415
## 8 AM WEST San Diego 383 65
## 9 AM WEST San Francisco 320 129
## 10 AM WEST Seattle 201 61
airline %>%
group_by(airline) %>%
select (airline, on_time, delayed) %>%
summarize(meandelay = mean(delayed), mediandelay = median(delayed), meanontime = mean(on_time), medianontime = median(on_time))
## # A tibble: 2 × 5
## airline meandelay mediandelay meanontime medianontime
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 ALASKA 100. 62 655. 497
## 2 AM WEST 157. 117 1288. 383
flight <- airline %>%
group_by(airline) %>%
mutate(total_delay = sum(delayed), total_ontime = sum(on_time),all_flight = total_delay + total_ontime, percent_rate = delayed/all_flight, percent_delay = (delayed/all_flight)) %>%
arrange(airline, desc(percent_delay))
flight
## # A tibble: 10 × 9
## # Groups: airline [2]
## airline location on_time delayed total_delay total_ontime all_flight
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALASKA Seattle 1841 305 501 3274 3775
## 2 ALASKA San Francisco 503 102 501 3274 3775
## 3 ALASKA Los Angeles 497 62 501 3274 3775
## 4 ALASKA San Diego 212 20 501 3274 3775
## 5 ALASKA Phoenix 221 12 501 3274 3775
## 6 AM WEST Phoenix 4840 415 787 6438 7225
## 7 AM WEST San Francisco 320 129 787 6438 7225
## 8 AM WEST Los Angeles 694 117 787 6438 7225
## 9 AM WEST San Diego 383 65 787 6438 7225
## 10 AM WEST Seattle 201 61 787 6438 7225
## # ℹ 2 more variables: percent_rate <dbl>, percent_delay <dbl>
total <- airline %>%
group_by(airline) %>%
summarize(sum = sum(delayed))
total
## # A tibble: 2 × 2
## airline sum
## <chr> <dbl>
## 1 ALASKA 501
## 2 AM WEST 787
airline %>%
filter(airline == "ALASKA") %>%
summary(airline)
## airline location on_time delayed
## Length:5 Length:5 Min. : 212.0 Min. : 12.0
## Class :character Class :character 1st Qu.: 221.0 1st Qu.: 20.0
## Mode :character Mode :character Median : 497.0 Median : 62.0
## Mean : 654.8 Mean :100.2
## 3rd Qu.: 503.0 3rd Qu.:102.0
## Max. :1841.0 Max. :305.0
airline %>%
filter(airline == "AM WEST") %>%
summary(airline)
## airline location on_time delayed
## Length:5 Length:5 Min. : 201 Min. : 61.0
## Class :character Class :character 1st Qu.: 320 1st Qu.: 65.0
## Mode :character Mode :character Median : 383 Median :117.0
## Mean :1288 Mean :157.4
## 3rd Qu.: 694 3rd Qu.:129.0
## Max. :4840 Max. :415.0
ggplot(flight, aes(x = location, y = percent_delay, fill = airline)) +
geom_bar(stat = "identity", position = "dodge")
ggplot(total, aes(x = airline, sum, fill = airline)) +
geom_bar(stat = "identity", position = "dodge")
ggplot(flight, aes(x = airline, percent_rate, fill = airline)) +
geom_bar(stat = "identity", position = "dodge")
AM West has more delays than Alaska as the average and median are higher than Alaska. But if we look at the overall flights compared to the percentage of delay we will notice that Alaska has more delays then AM West. There was a bias if we were jsut looking at over all delayed in total without looking overall picture.