I’ll first start by creating a data frame to analogous to table presented and write it to an csv file.
library(knitr)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data <- data.frame(c(NA, 'ALASKA', NA, NA, 'AM WEST', NA),
c(NA, 'on time', 'delayed', NA, 'on time', 'delayed'),
c('Los Angeles', 497, 62, NA, 694, 117),
c('Phoenix', 221, 12, NA, 4840, 415),
c('San Diego', 212, 20, NA, 383, 65),
c('San Francisco', 503, 102, NA, 320, 129),
c('Seattle', 1841, 305, NA, 201, 61));
#kable(data)
write.table(data,"airlines.csv",col.names = FALSE, row.names=FALSE, sep = ",")
#write.table(data,"airlines.csv", sep = ",")
Transforming the data
library(knitr)
library(tidyr)
library(dplyr)
dd <- read.csv("airlines.csv");
dd <- data.frame(dd); # convert into a data frame
dd <- dd[rowSums(is.na(dd)) != ncol(dd), ]; # remove empty rows
dd <- rename(dd, c("airline" = "NA.",
"status" = "NA..1",
"Los Angeles" = "Los.Angeles",
"San Diego" = "San.Diego",
"San Francisco" = "San.Francisco"));
dd <- gather(dd, key="destination", value="amount", 3:7);
dd <- dd %>% fill(airline, .direction = "down") # fill the blank rows of the airline column with the previous value
kable(dd) #showing transformed table
| airline | status | destination | amount |
|---|---|---|---|
| ALASKA | on time | Los Angeles | 497 |
| ALASKA | delayed | Los Angeles | 62 |
| AM WEST | on time | Los Angeles | 694 |
| AM WEST | delayed | Los Angeles | 117 |
| ALASKA | on time | Phoenix | 221 |
| ALASKA | delayed | Phoenix | 12 |
| AM WEST | on time | Phoenix | 4840 |
| AM WEST | delayed | Phoenix | 415 |
| ALASKA | on time | San Diego | 212 |
| ALASKA | delayed | San Diego | 20 |
| AM WEST | on time | San Diego | 383 |
| AM WEST | delayed | San Diego | 65 |
| ALASKA | on time | San Francisco | 503 |
| ALASKA | delayed | San Francisco | 102 |
| AM WEST | on time | San Francisco | 320 |
| AM WEST | delayed | San Francisco | 129 |
| ALASKA | on time | Seattle | 1841 |
| ALASKA | delayed | Seattle | 305 |
| AM WEST | on time | Seattle | 201 |
| AM WEST | delayed | Seattle | 61 |
Perform analysis to compare the arrival delays for the two airlines
library(knitr)
library(tidyr)
library(dplyr)
#
# airline_delay <- dd %>% group_by(airline) %>%
# summarise(delay_count = count(dd$amount))
dd2 <- filter(dd,status == "delayed" & airline == "ALASKA");
kable(dd2)
| airline | status | destination | amount |
|---|---|---|---|
| ALASKA | delayed | Los Angeles | 62 |
| ALASKA | delayed | Phoenix | 12 |
| ALASKA | delayed | San Diego | 20 |
| ALASKA | delayed | San Francisco | 102 |
| ALASKA | delayed | Seattle | 305 |
print(paste("Alaska airlines has amount of delays of: ",sum(dd2$amount)))
[1] “Alaska airlines has amount of delays of: 501”
dd2 <- filter(dd,status == "delayed" & airline == "AM WEST");
kable(dd2)
| airline | status | destination | amount |
|---|---|---|---|
| AM WEST | delayed | Los Angeles | 117 |
| AM WEST | delayed | Phoenix | 415 |
| AM WEST | delayed | San Diego | 65 |
| AM WEST | delayed | San Francisco | 129 |
| AM WEST | delayed | Seattle | 61 |
print(paste("AM WEST airlines has amount of delays of: ",sum(dd2$amount)))
[1] “AM WEST airlines has amount of delays of: 787”