delay<-read.csv("C:/Users/hangr/Documents/Acquisition and data management/Arrivals.csv", header=TRUE)
delay
The dataset has 20 records and 7 variables. We have airlines, the status of the airlines going to 5 cities.Our goal is to compare the arrival delays for the two airlines.
#Load the libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
#names the columns
names(delay)[1]<-"Airlines"
names(delay)[2]<-"Status"
delay
#Remove NA
delay<-delay %>%
filter(!is.na(Phoenix))
delay
#Fill out missing Airlines Value
delay$Airlines<-as.character(delay$Airlines)
delay$Airlines[delay$Airlines=='']<-c("Alaska", "AM West")
delay
delay_trans<-tidyr::gather(delay,"City", "Count",3:7)
delay_trans
delay_trans2<-tidyr::spread(delay_trans,Status, Count)
delay_trans2
#Upload ggplot library
library(ggplot2)
#Plot of delayed airlines
ggplot(filter(delay_trans,Status=="Delay"),aes(City,Count, color=Airlines))+geom_point(size=3)+ggtitle("Delayed Flight")+ylab("Number of Delayed Flights")
##Comparing the arrivals delay for the two airlines
sum_delay<-delay_trans2 %>% dplyr::group_by(Airlines) %>%
dplyr::summarise(max=max(Delay),
min=min(Delay),
avg=mean(Delay),
SD=sd(Delay),
IQR=IQR(Delay))
sum_delay
AM West is most likely to be delayed than Alaska