##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# import csv into R
flights <- read.csv("https://raw.githubusercontent.com/jasonjgy2000/IS607/master/Assignments/Week%206/flights.csv")
head(flights)
## X X.1 Los.Angeles Phoenix San.Diego San.Francisco Seatlle
## 1 ALASKA on time 497 221 212 503 1841
## 2 delayed 62 12 20 102 305
## 3 NA NA NA NA NA
## 4 AMWEST on time 694 4840 383 320 201
## 5 delayed 117 415 65 129 61
I noticed that the 3rd row of the dataframe was empty. I also noticed that the 2nd and 5th observation were missing the airline data. So I then proceeded to remove the 3rd row and fill in the missing data.
#remove empty row
flights <- flights[c("1","2","4","5"),]
# Giving columns names
colnames(flights)[1] <- "Airline"
colnames(flights)[2] <- "Status"
# filling in missing data
flights[2,1] <-"ALASKA"
flights[4,1] <-"AMWEST"
head(flights)
## Airline Status Los.Angeles Phoenix San.Diego San.Francisco Seatlle
## 1 ALASKA on time 497 221 212 503 1841
## 2 ALASKA delayed 62 12 20 102 305
## 4 AMWEST on time 694 4840 383 320 201
## 5 AMWEST delayed 117 415 65 129 61
# transforming the data structure from wide to long
flights <- flights %>% gather("City","n",3:7) %>% spread(Status,n)
head(flights)
## Airline City delayed on time
## 1 ALASKA Los.Angeles 62 497
## 2 ALASKA Phoenix 12 221
## 3 ALASKA San.Diego 20 212
## 4 ALASKA San.Francisco 102 503
## 5 ALASKA Seatlle 305 1841
## 6 AMWEST Los.Angeles 117 694
i <- flights %>% group_by(Airline) %>% summarise(dmean = mean(as.integer(delayed)))
# barplot of each airline's mean delays.
ggplot(data = i, aes(x=Airline, y= dmean, fill=Airline)) + geom_bar(stat = "identity")
i <- flights %>% group_by(Airline,City) %>% summarise(dmean = mean(as.integer(delayed)))
# barplot of delays grouped by airline and city.
ggplot(data = i, aes(x=Airline, y= dmean,fill = City)) + geom_bar(stat = "identity", position=position_dodge())