Load the required packages:
library(dplyr)
library(tidyr)
library(knitr)
1. Read the given flights CSV file into an R object
# Read the source file from the Github repository
flights_data <- read.csv("https://raw.githubusercontent.com/isrini/SI_IS607/master/flights_data.csv")
head(flights_data)
## X X.1 Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1 ALASKA on time 497 221 212 503 1841
## 2 delayed 62 12 20 102 305
## 3 AM WEST on time 694 4840 383 320 201
## 4 delayed 117 415 65 129 61
2. Do the data wrangling with tidyr and dplyr
#Add the airline name in the blank rows by selecting the row with airline name and updating the
# blank row with the aairline name.
data_row <- seq(1, 3, by = 2)
blank_row <- seq(2, 4, by = 2)
flights_data[blank_row,1] <- flights_data[data_row,1]
head(flights_data)
## X X.1 Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1 ALASKA on time 497 221 212 503 1841
## 2 ALASKA delayed 62 12 20 102 305
## 3 AM WEST on time 694 4840 383 320 201
## 4 AM WEST delayed 117 415 65 129 61
# Using gather to make city into a variable
flights_data <- gather(flights_data, "City", "n", 3:7)
head(flights_data)
## X X.1 City n
## 1 ALASKA on time Los.Angeles 497
## 2 ALASKA delayed Los.Angeles 62
## 3 AM WEST on time Los.Angeles 694
## 4 AM WEST delayed Los.Angeles 117
## 5 ALASKA on time Phoenix 221
## 6 ALASKA delayed Phoenix 12
## use spread to make 'on-time' and 'delayed' rows into two new distinct columns
flights_data <- spread(flights_data, X.1, n)
head(flights_data)
## X City delayed on time
## 1 ALASKA Los.Angeles 62 497
## 2 ALASKA Phoenix 12 221
## 3 ALASKA San.Diego 20 212
## 4 ALASKA San.Francisco 102 503
## 5 ALASKA Seattle 305 1841
## 6 AM WEST Los.Angeles 117 694
# And finally doing some column name adjustments.
# Name the first column header as 'Airline'
colnames(flights_data)[1] <- "Airline"
# Replace the space with the hyphen for the column 'on time'
colnames(flights_data)[4] <- "on_time"
# Replace the . in the city names with a space
flights_data$City <- gsub("\\."," ",flights_data$City)