A sample flight arrival status data given in raw format, which needs to be cleaned, transformed for analysis and data insights. The following R libraries are used for data tidying and transformation
library(RCurl)
library(dplyr)
library(tidyr)
arrival_status<-read.csv("https://raw.githubusercontent.com/thasleem1/DATA607/master/arrival_data.csv",na.strings ="") %>%
rename(Airline = X, ArrivalStatus = X.1) %>%
fill(Airline, .direction = "down") %>%
na.exclude()
arrival_statusarrival_status_gather <- arrival_status %>% gather("City","ArrivalCount", 3:7)
arrival_status_spread <- spread(arrival_status_gather,ArrivalStatus,ArrivalCount)
arrival_status_mutate_delay <- mutate(arrival_status_spread,delayratio = delayed/(`on time`+delayed)*100)
arrival_status_mutate_arrival <- mutate(arrival_status_spread,ontimeratio = `on time`/(`on time`+delayed)*100)
filght_status <- inner_join(arrival_status_mutate_delay, arrival_status_mutate_arrival, by = c('Airline','City','delayed','on time'))
filght_statusGraphical representation of flight delay and on-arrival time ratio
library(ggplot2)
ggplot(data=filght_status, aes(x=City, y=delayratio, fill=Airline)) +
geom_bar(stat="identity", position=position_dodge())+
geom_text(aes(label=format(round(delayratio, 2), nsmall = 2)), vjust=1.6, color="white",
position = position_dodge(0.9), size=3.5)+
scale_fill_brewer(palette="Set1") + ggtitle("Delay Ratio") + ylab("delay") +
theme(plot.title = element_text(hjust = 0.5))ggplot(data=filght_status, aes(x=City, y=ontimeratio, fill=Airline)) +
geom_bar(stat="identity", position=position_dodge())+
geom_text(aes(label=format(round(ontimeratio, 2), nsmall = 2)), vjust=1.6, color="white",
position = position_dodge(0.9), size=3.5)+
scale_fill_brewer(palette="Dark2") + ggtitle("On Time Ratio") + ylab("on time") +
theme(plot.title = element_text(hjust = 0.5))Export file of final data
#Please change the directory accordingly to your file system
setwd("C:/Users/aisha/Dropbox/CUNY/Semester1/DATA607_Data_Acquisition_and_Management/Week5")
write.csv(filght_status,"flight_arrival_data.csv")Based on the data analysis, some observation are given below considering airlines and cities
Cities below ordered by best on_time arrival (1-best and 5-poor)
1.Pheonix
2.Los Angeles
3.San Diego
4.Seattle
5.San Francisco