#Read is csv file
domflights <- read.csv("domestic_flights_jan_2016.csv", header = TRUE,
stringsAsFactors = FALSE)
kable(str(domflights))
## 'data.frame': 90850 obs. of 21 variables:
## $ FlightDate : chr "1/6/2016" "1/7/2016" "1/8/2016" "1/9/2016" ...
## $ Carrier : chr "AA" "AA" "AA" "AA" ...
## $ TailNum : chr "N4YBAA" "N434AA" "N541AA" "N489AA" ...
## $ FlightNum : int 43 43 43 43 43 43 43 43 43 43 ...
## $ Origin : chr "DFW" "DFW" "DFW" "DFW" ...
## $ OriginCityName : chr "Dallas/Fort Worth, TX" "Dallas/Fort Worth, TX" "Dallas/Fort Worth, TX" "Dallas/Fort Worth, TX" ...
## $ OriginState : chr "TX" "TX" "TX" "TX" ...
## $ Dest : chr "DTW" "DTW" "DTW" "DTW" ...
## $ DestCityName : chr "Detroit, MI" "Detroit, MI" "Detroit, MI" "Detroit, MI" ...
## $ DestState : chr "MI" "MI" "MI" "MI" ...
## $ CRSDepTime : int 1100 1100 1100 1100 1100 1100 1100 1100 1100 1100 ...
## $ DepTime : int 1057 1056 1055 1102 1240 1107 1059 1055 1058 1056 ...
## $ WheelsOff : int 1112 1110 1116 1115 1300 1118 1113 1107 1110 1110 ...
## $ WheelsOn : int 1424 1416 1431 1424 1617 1426 1429 1419 1420 1423 ...
## $ CRSArrTime : int 1438 1438 1438 1438 1438 1438 1438 1438 1438 1438 ...
## $ ArrTime : int 1432 1426 1445 1433 1631 1435 1438 1431 1428 1434 ...
## $ Cancelled : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Diverted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CRSElapsedTime : int 158 158 158 158 158 158 158 158 158 158 ...
## $ ActualElapsedTime: int 155 150 170 151 171 148 159 156 150 158 ...
## $ Distance : int 986 986 986 986 986 986 986 986 986 986 ...
domflights %>% filter(!complete.cases(.)) %>% str() %>% kable()
## 'data.frame': 3027 obs. of 21 variables:
## $ FlightDate : chr "1/16/2016" "1/23/2016" "1/24/2016" "1/25/2016" ...
## $ Carrier : chr "AA" "AA" "AA" "AA" ...
## $ TailNum : chr "N3CXAA" "" "N3DGAA" "N3MNAA" ...
## $ FlightNum : int 44 44 44 44 45 45 45 45 50 52 ...
## $ Origin : chr "SEA" "SEA" "SEA" "SEA" ...
## $ OriginCityName : chr "Seattle, WA" "Seattle, WA" "Seattle, WA" "Seattle, WA" ...
## $ OriginState : chr "WA" "WA" "WA" "WA" ...
## $ Dest : chr "JFK" "JFK" "JFK" "JFK" ...
## $ DestCityName : chr "New York, NY" "New York, NY" "New York, NY" "New York, NY" ...
## $ DestState : chr "NY" "NY" "NY" "NY" ...
## $ CRSDepTime : int 640 640 645 645 1830 1830 1830 1820 1410 1315 ...
## $ DepTime : int NA NA NA NA NA NA NA NA NA NA ...
## $ WheelsOff : int NA NA NA NA NA NA NA NA NA NA ...
## $ WheelsOn : int NA NA NA NA NA NA NA NA NA NA ...
## $ CRSArrTime : int 1501 1501 1506 1506 2152 2152 2152 2142 1604 2110 ...
## $ ArrTime : int NA NA NA NA NA NA NA NA NA NA ...
## $ Cancelled : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Diverted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CRSElapsedTime : int 321 321 321 321 382 382 382 382 114 295 ...
## $ ActualElapsedTime: int NA NA NA NA NA NA NA NA NA NA ...
## $ Distance : int 2422 2422 2422 2422 2422 2422 2422 2422 550 2311 ...
domflights %>% filter(Cancelled == 0, Diverted == 0) %>% filter(!complete.cases(.)) %>% str() %>% kable()
## 'data.frame': 1 obs. of 21 variables:
## $ FlightDate : chr "1/7/2016"
## $ Carrier : chr "B6"
## $ TailNum : chr "N935JB"
## $ FlightNum : int 524
## $ Origin : chr "LAX"
## $ OriginCityName : chr "Los Angeles, CA"
## $ OriginState : chr "CA"
## $ Dest : chr "JFK"
## $ DestCityName : chr "New York, NY"
## $ DestState : chr "NY"
## $ CRSDepTime : int 1650
## $ DepTime : int 1649
## $ WheelsOff : int 1659
## $ WheelsOn : int 59
## $ CRSArrTime : int 58
## $ ArrTime : int 103
## $ Cancelled : int 0
## $ Diverted : int 0
## $ CRSElapsedTime : int NA
## $ ActualElapsedTime: int NA
## $ Distance : int NA
domflights2 <- domflights %>% filter(complete.cases(.))
domflights2 %>% summarise(total_flights = n()) %>% kable()
total_flights |
---|
87823 |
#Convert FlightDate to a date format
domflights2$FlightDate <- as.Date(domflights2$FlightDate, format = "%m/%d/%Y")
#Convert CRSDepTime to datetime format
domflights2 <- domflights2 %>%
mutate(new_CRSDepTime = paste(FlightDate, sprintf("%04d", CRSDepTime)))
domflights2$new_CRSDepTime <- as.POSIXct(domflights2$new_CRSDepTime, format="%Y-%m-%d %H%M")
#Convert CRSArrTime to datetime format
domflights2 <- domflights2 %>%
mutate(new_CRSArrTime = paste(FlightDate, sprintf("%04d", CRSArrTime)))
domflights2$new_CRSArrTime <- as.POSIXct(domflights2$new_CRSArrTime, format="%Y-%m-%d %H%M")
#Convert DepTime, WheelsOff, WheelsOn, and ArrTime to datetime format
domflights2 <- domflights2 %>%
mutate(new_DepTime = paste(FlightDate, sprintf("%04d", DepTime)),
new_WheelsOff = paste(FlightDate, sprintf("%04d", WheelsOff)),
new_WheelsOn = paste(FlightDate, sprintf("%04d", WheelsOn)),
new_ArrTime = paste(FlightDate, sprintf("%04d", ArrTime)))
domflights2$new_DepTime <- as.POSIXct(domflights2$new_DepTime, format="%Y-%m-%d %H%M")
domflights2$new_WheelsOff <- as.POSIXct(domflights2$new_WheelsOff, format="%Y-%m-%d %H%M")
domflights2$new_WheelsOn <- as.POSIXct(domflights2$new_WheelsOn, format="%Y-%m-%d %H%M")
domflights2$new_ArrTime <- as.POSIXct(domflights2$new_ArrTime, format="%Y-%m-%d %H%M")
DepDelay = new_DepTime - new_CRSDepTime
DepDelayMinutes = ifelse(DepDelay < 0, 0, DepDelay)
DepDel30 = ifelse(DepDelay >= 30, 1, 0)
TaxiOut = new_WheelsOff - new_DepTime
TaxiIn = new_ArrTime - new_WheelsOn
ArrDelay = new_ArrTime - new_ArrDepTime
ArrDelayMinutes = ifelse(ArrDelay < 0, 0, ArrDelay)
ArrDel30 = ifelse(ArrDelay >= 30, 1, 0)
FlightTimeBuffer = CRSElapsedTime - ActualElapsedTime
AirTime = ActualElapsedTime - TaxiOut - TaxiIn
*AirSpeed = Distance / (Airtime / 60)
#Add calculations as new columns
domflights2 <- domflights2 %>%
mutate(DepDelay = as.integer(difftime(new_DepTime, new_CRSDepTime, units = "mins")),
DepDelayMinutes = ifelse(DepDelay < 0, 0, DepDelay),
DepDel30 = ifelse(DepDelay >= 30, 1, 0),
TaxiOut = as.integer(difftime(new_WheelsOff, new_DepTime, units = "mins")),
TaxiIn = as.integer(difftime(new_ArrTime, new_WheelsOn, units = "mins")),
ArrDelay = as.integer(difftime(new_ArrTime, new_CRSArrTime, units = "mins")),
ArrDelayMinutes = ifelse(ArrDelay < 0, 0, ArrDelay),
ArrDel30 = ifelse(ArrDelay >= 30, 1, 0),
FlightTimeBuffer = CRSElapsedTime - ActualElapsedTime,
AirTime = ActualElapsedTime - TaxiOut - TaxiIn,
AirSpeed = Distance / (AirTime / 60))
domflights2 %>% group_by(Carrier) %>%
summarise(delay_flights = sum(DepDel30) + sum(ArrDel30), perc_delayed = delay_flights / n() * 100) %>%
ggvis(~Carrier, ~perc_delayed) %>% layer_bars()
meflights <- domflights2 %>% filter(Origin == "PWM"| Dest == "PWM")
meflights %>% group_by(Carrier) %>% summarise(delay_flights = sum(DepDel30) + sum(ArrDel30), perc_delayed = delay_flights / n() * 100) %>% ggvis(~Carrier, ~perc_delayed) %>% layer_bars()