library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(chron)
##
## Attaching package: 'chron'
## The following objects are masked from 'package:lubridate':
##
## days, hours, minutes, seconds, years
ASample <- read.csv("ASample.csv", header = TRUE, stringsAsFactors = FALSE)
kable(head(ASample))
| X | FlightDate | Carrier | TailNum | FlightNum | Origin | OriginCityName | OriginState | Dest | DestCityName | DestState | CRSDepTime | DepTime | WheelsOff | WheelsOn | CRSArrTime | ArrTime | Cancelled | Diverted | CRSElapsedTime | ActualElapsedTime | Distance |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 177486 | 1/31/2016 | OO | N905SW | 6258 | ORD | Chicago, IL | IL | OKC | Oklahoma City, OK | OK | 1919 | 1911 | 1926 | 2123 | 2141 | 2129 | 0 | 0 | 142 | 138 | 693 |
| 146417 | 1/25/2016 | OO | N782SK | 5707 | PDX | Portland, OR | OR | DEN | Denver, CO | CO | 1050 | 1050 | 1105 | 1413 | 1421 | 1422 | 0 | 0 | 151 | 152 | 991 |
| 274453 | 1/15/2016 | WN | N460WN | 1076 | PHX | Phoenix, AZ | AZ | DEN | Denver, CO | CO | 620 | 616 | 629 | 747 | 810 | 755 | 0 | 0 | 110 | 99 | 602 |
| 35196 | 1/11/2016 | AA | N5EJAA | 2495 | DFW | Dallas/Fort Worth, TX | TX | LAX | Los Angeles, CA | CA | 2030 | 2029 | 2047 | 2117 | 2157 | 2122 | 0 | 0 | 207 | 173 | 1235 |
| 221565 | 1/1/2016 | UA | N477UA | 371 | ATL | Atlanta, GA | GA | IAH | Houston, TX | TX | 1855 | 1852 | 1906 | 1958 | 2025 | 2009 | 0 | 0 | 150 | 137 | 689 |
| 424377 | 1/22/2016 | EV | N857AS | 4873 | ATL | Atlanta, GA | GA | GNV | Gainesville, FL | FL | 2230 | 11 | 57 | 153 | 2338 | 157 | 0 | 0 | 68 | 106 | 300 |
str(ASample)
## 'data.frame': 9396 obs. of 22 variables:
## $ X : int 177486 146417 274453 35196 221565 424377 377085 67770 222796 374515 ...
## $ FlightDate : chr "1/31/2016" "1/25/2016" "1/15/2016" "1/11/2016" ...
## $ Carrier : chr "OO" "OO" "WN" "AA" ...
## $ TailNum : chr "N905SW" "N782SK" "N460WN" "N5EJAA" ...
## $ FlightNum : int 6258 5707 1076 2495 371 4873 2617 426 964 1228 ...
## $ Origin : chr "ORD" "PDX" "PHX" "DFW" ...
## $ OriginCityName : chr "Chicago, IL" "Portland, OR" "Phoenix, AZ" "Dallas/Fort Worth, TX" ...
## $ OriginState : chr "IL" "OR" "AZ" "TX" ...
## $ Dest : chr "OKC" "DEN" "DEN" "LAX" ...
## $ DestCityName : chr "Oklahoma City, OK" "Denver, CO" "Denver, CO" "Los Angeles, CA" ...
## $ DestState : chr "OK" "CO" "CO" "CA" ...
## $ CRSDepTime : int 1919 1050 620 2030 1855 2230 1659 605 2050 1316 ...
## $ DepTime : int 1911 1050 616 2029 1852 11 1653 627 2040 1314 ...
## $ WheelsOff : int 1926 1105 629 2047 1906 57 1704 639 2049 1327 ...
## $ WheelsOn : int 2123 1413 747 2117 1958 153 1937 830 2156 1514 ...
## $ CRSArrTime : int 2141 1421 810 2157 2025 2338 2012 851 2215 1542 ...
## $ ArrTime : int 2129 1422 755 2122 2009 157 1947 913 2201 1522 ...
## $ Cancelled : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Diverted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CRSElapsedTime : int 142 151 110 207 150 68 133 286 85 86 ...
## $ ActualElapsedTime: int 138 152 99 173 137 106 114 286 81 68 ...
## $ Distance : int 693 991 602 1235 689 300 707 1797 447 341 ...
#Delete the unnecessary columns from the cancelled flights data
ASample <- ASample[-c(1)]
#Convert FlightDate to the Date class
ASample$FlightDate <- as.Date(ASample$FlightDate, format = "%m/%d/%Y")
# #Using Posixct, put the time variables, which are presently "int" in a date/time format
ASample <- ASample %>%
mutate(new_CRSDepTime = paste(FlightDate, sprintf("%04d", CRSDepTime)), new_CRSArrTime = paste(FlightDate, sprintf("%04d", CRSArrTime)), new_DepTime = paste(FlightDate, sprintf("%04d",DepTime)), new_ArrTime = paste(FlightDate, sprintf("%04d", ArrTime)), new_WheelsOff = paste(FlightDate, sprintf("%04d",WheelsOff)), new_WheelsOn = paste(FlightDate, sprintf("%04d", WheelsOn)))
ASample$new_CRSDepTime <- as.POSIXct(ASample$new_CRSDepTime, format="%Y-%m-%d %H%M")
ASample$new_CRSArrTime <- as.POSIXct(ASample$new_CRSArrTime, format="%Y-%m-%d %H%M")
ASample$new_DepTime <- as.POSIXct(ASample$new_DepTime, format="%Y-%m-%d %H%M")
ASample$new_ArrTime <- as.POSIXct(ASample$new_ArrTime, format="%Y-%m-%d %H%M")
ASample$new_WheelsOff <- as.POSIXct(ASample$new_WheelsOff, format="%Y-%m-%d %H%M")
ASample$new_WheelsOn <- as.POSIXct(ASample$new_WheelsOn, format ="%Y-%m-%d %H%M")
kable(head(ASample))
| FlightDate | Carrier | TailNum | FlightNum | Origin | OriginCityName | OriginState | Dest | DestCityName | DestState | CRSDepTime | DepTime | WheelsOff | WheelsOn | CRSArrTime | ArrTime | Cancelled | Diverted | CRSElapsedTime | ActualElapsedTime | Distance | new_CRSDepTime | new_CRSArrTime | new_DepTime | new_ArrTime | new_WheelsOff | new_WheelsOn |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2016-01-31 | OO | N905SW | 6258 | ORD | Chicago, IL | IL | OKC | Oklahoma City, OK | OK | 1919 | 1911 | 1926 | 2123 | 2141 | 2129 | 0 | 0 | 142 | 138 | 693 | 2016-01-31 19:19:00 | 2016-01-31 21:41:00 | 2016-01-31 19:11:00 | 2016-01-31 21:29:00 | 2016-01-31 19:26:00 | 2016-01-31 21:23:00 |
| 2016-01-25 | OO | N782SK | 5707 | PDX | Portland, OR | OR | DEN | Denver, CO | CO | 1050 | 1050 | 1105 | 1413 | 1421 | 1422 | 0 | 0 | 151 | 152 | 991 | 2016-01-25 10:50:00 | 2016-01-25 14:21:00 | 2016-01-25 10:50:00 | 2016-01-25 14:22:00 | 2016-01-25 11:05:00 | 2016-01-25 14:13:00 |
| 2016-01-15 | WN | N460WN | 1076 | PHX | Phoenix, AZ | AZ | DEN | Denver, CO | CO | 620 | 616 | 629 | 747 | 810 | 755 | 0 | 0 | 110 | 99 | 602 | 2016-01-15 06:20:00 | 2016-01-15 08:10:00 | 2016-01-15 06:16:00 | 2016-01-15 07:55:00 | 2016-01-15 06:29:00 | 2016-01-15 07:47:00 |
| 2016-01-11 | AA | N5EJAA | 2495 | DFW | Dallas/Fort Worth, TX | TX | LAX | Los Angeles, CA | CA | 2030 | 2029 | 2047 | 2117 | 2157 | 2122 | 0 | 0 | 207 | 173 | 1235 | 2016-01-11 20:30:00 | 2016-01-11 21:57:00 | 2016-01-11 20:29:00 | 2016-01-11 21:22:00 | 2016-01-11 20:47:00 | 2016-01-11 21:17:00 |
| 2016-01-01 | UA | N477UA | 371 | ATL | Atlanta, GA | GA | IAH | Houston, TX | TX | 1855 | 1852 | 1906 | 1958 | 2025 | 2009 | 0 | 0 | 150 | 137 | 689 | 2016-01-01 18:55:00 | 2016-01-01 20:25:00 | 2016-01-01 18:52:00 | 2016-01-01 20:09:00 | 2016-01-01 19:06:00 | 2016-01-01 19:58:00 |
| 2016-01-22 | EV | N857AS | 4873 | ATL | Atlanta, GA | GA | GNV | Gainesville, FL | FL | 2230 | 11 | 57 | 153 | 2338 | 157 | 0 | 0 | 68 | 106 | 300 | 2016-01-22 22:30:00 | 2016-01-22 23:38:00 | 2016-01-22 00:11:00 | 2016-01-22 01:57:00 | 2016-01-22 00:57:00 | 2016-01-22 01:53:00 |
str(ASample)
## 'data.frame': 9396 obs. of 27 variables:
## $ FlightDate : Date, format: "2016-01-31" "2016-01-25" ...
## $ Carrier : chr "OO" "OO" "WN" "AA" ...
## $ TailNum : chr "N905SW" "N782SK" "N460WN" "N5EJAA" ...
## $ FlightNum : int 6258 5707 1076 2495 371 4873 2617 426 964 1228 ...
## $ Origin : chr "ORD" "PDX" "PHX" "DFW" ...
## $ OriginCityName : chr "Chicago, IL" "Portland, OR" "Phoenix, AZ" "Dallas/Fort Worth, TX" ...
## $ OriginState : chr "IL" "OR" "AZ" "TX" ...
## $ Dest : chr "OKC" "DEN" "DEN" "LAX" ...
## $ DestCityName : chr "Oklahoma City, OK" "Denver, CO" "Denver, CO" "Los Angeles, CA" ...
## $ DestState : chr "OK" "CO" "CO" "CA" ...
## $ CRSDepTime : int 1919 1050 620 2030 1855 2230 1659 605 2050 1316 ...
## $ DepTime : int 1911 1050 616 2029 1852 11 1653 627 2040 1314 ...
## $ WheelsOff : int 1926 1105 629 2047 1906 57 1704 639 2049 1327 ...
## $ WheelsOn : int 2123 1413 747 2117 1958 153 1937 830 2156 1514 ...
## $ CRSArrTime : int 2141 1421 810 2157 2025 2338 2012 851 2215 1542 ...
## $ ArrTime : int 2129 1422 755 2122 2009 157 1947 913 2201 1522 ...
## $ Cancelled : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Diverted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CRSElapsedTime : int 142 151 110 207 150 68 133 286 85 86 ...
## $ ActualElapsedTime: int 138 152 99 173 137 106 114 286 81 68 ...
## $ Distance : int 693 991 602 1235 689 300 707 1797 447 341 ...
## $ new_CRSDepTime : POSIXct, format: "2016-01-31 19:19:00" "2016-01-25 10:50:00" ...
## $ new_CRSArrTime : POSIXct, format: "2016-01-31 21:41:00" "2016-01-25 14:21:00" ...
## $ new_DepTime : POSIXct, format: "2016-01-31 19:11:00" "2016-01-25 10:50:00" ...
## $ new_ArrTime : POSIXct, format: "2016-01-31 21:29:00" "2016-01-25 14:22:00" ...
## $ new_WheelsOff : POSIXct, format: "2016-01-31 19:26:00" "2016-01-25 11:05:00" ...
## $ new_WheelsOn : POSIXct, format: "2016-01-31 21:23:00" "2016-01-25 14:13:00" ...
The dataframe summary above confirms that the classes of some of the variables were altered.
Reading in the names is handy because I’ll be changing some of the variable names and need to know the exact original name of the column. The list below provides a tool to use for copying and pasting when needed.
kable(names(ASample))
| FlightDate |
| Carrier |
| TailNum |
| FlightNum |
| Origin |
| OriginCityName |
| OriginState |
| Dest |
| DestCityName |
| DestState |
| CRSDepTime |
| DepTime |
| WheelsOff |
| WheelsOn |
| CRSArrTime |
| ArrTime |
| Cancelled |
| Diverted |
| CRSElapsedTime |
| ActualElapsedTime |
| Distance |
| new_CRSDepTime |
| new_CRSArrTime |
| new_DepTime |
| new_ArrTime |
| new_WheelsOff |
| new_WheelsOn |
271 rows contain “NA” data. Most of them represent cancelled flights. Each case can be investigated. Scrolling through the 271 data points is lenthy,: I’ve therefore chosen to only show the first 6 entries.
kable(head(ASample %>% filter(!complete.cases(.))))
| FlightDate | Carrier | TailNum | FlightNum | Origin | OriginCityName | OriginState | Dest | DestCityName | DestState | CRSDepTime | DepTime | WheelsOff | WheelsOn | CRSArrTime | ArrTime | Cancelled | Diverted | CRSElapsedTime | ActualElapsedTime | Distance | new_CRSDepTime | new_CRSArrTime | new_DepTime | new_ArrTime | new_WheelsOff | new_WheelsOn |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2016-01-05 | WN | N214WN | 2702 | SJC | San Jose, CA | CA | LAX | Los Angeles, CA | CA | 1650 | NA | NA | NA | 1800 | NA | 1 | 0 | 70 | NA | 308 | 2016-01-05 16:50:00 | 2016-01-05 18:00:00 | NA | NA | NA | NA |
| 2016-01-24 | AA | 1910 | CLT | Charlotte, NC | NC | LGA | New York, NY | NY | 955 | NA | NA | NA | 1153 | NA | 1 | 0 | 118 | NA | 544 | 2016-01-24 09:55:00 | 2016-01-24 11:53:00 | NA | NA | NA | NA | |
| 2016-01-23 | DL | N329NB | 1720 | EWR | Newark, NJ | NJ | DTW | Detroit, MI | MI | 1232 | NA | NA | NA | 1428 | NA | 1 | 0 | 116 | NA | 488 | 2016-01-23 12:32:00 | 2016-01-23 14:28:00 | NA | NA | NA | NA |
| 2016-01-24 | DL | N3752 | 2360 | SLC | Salt Lake City, UT | UT | DCA | Washington, DC | VA | 1703 | NA | NA | NA | 2259 | NA | 1 | 0 | 236 | NA | 1851 | 2016-01-24 17:03:00 | 2016-01-24 22:59:00 | NA | NA | NA | NA |
| 2016-01-22 | AA | 772 | SJU | San Juan, PR | PR | PHL | Philadelphia, PA | PA | 1600 | NA | NA | NA | 1900 | NA | 1 | 0 | 240 | NA | 1576 | 2016-01-22 16:00:00 | 2016-01-22 19:00:00 | NA | NA | NA | NA | |
| 2016-01-23 | WN | N7719A | 1943 | BOS | Boston, MA | MA | BWI | Baltimore, MD | MD | 1415 | NA | NA | NA | 1550 | NA | 1 | 0 | 95 | NA | 369 | 2016-01-23 14:15:00 | 2016-01-23 15:50:00 | NA | NA | NA | NA |
cancelled <- ASample %>% filter(Cancelled == 1)
254 of the 9,396 sampled flights were cancelled and there are 271 incomplete cases.
library(ggvis)
cancelled %>% ggvis(~Carrier, ~Cancelled, fill := "#CCB359", stroke := NA)
## Guessing layer_bars()
In order do more analysis on the data, we need to do calculations. I know these are the same as those from the class lecture. I feel that typing them in is good practice and helps me more clearly understand the R coding process.
#Actual Departure Delays
ASample <- ASample %>% filter(Cancelled ==0) %>% mutate(DepDelay = as.integer(difftime(new_DepTime, new_CRSDepTime, units = "mins")), DepDelayMinutes = ifelse(DepDelay < 0, 0, DepDelay),
DepDel15 = ifelse(DepDelay >= 15, 1, 0),TaxiOut = as.integer(difftime(new_WheelsOff, new_DepTime, units = "mins")),
#Taxi In
TaxiIn = as.integer(difftime(new_ArrTime, new_WheelsOn, units = "mins")),
#Actual Arrival Delays
ArrDelay = as.integer(difftime(new_ArrTime, new_CRSArrTime, units = "mins")),ArrDelayMinutes = ifelse(ArrDelay < 0, 0, ArrDelay), ArrDel15 = ifelse(ArrDelay >= 15, 1, 0),
#Flight Time Buffer
FlightTimeBuffer = CRSElapsedTime - ActualElapsedTime)
#Air Time
ASample <- ASample %>% filter(Cancelled == 0) %>%
mutate(AirTime = ActualElapsedTime - TaxiOut - TaxiIn)
#Air Speed
ASample <- ASample %>% filter(Cancelled == 0) %>%
mutate(AirSpeed = Distance / (AirTime / 60))
kable(head(ASample), digits = 0)
| FlightDate | Carrier | TailNum | FlightNum | Origin | OriginCityName | OriginState | Dest | DestCityName | DestState | CRSDepTime | DepTime | WheelsOff | WheelsOn | CRSArrTime | ArrTime | Cancelled | Diverted | CRSElapsedTime | ActualElapsedTime | Distance | new_CRSDepTime | new_CRSArrTime | new_DepTime | new_ArrTime | new_WheelsOff | new_WheelsOn | DepDelay | DepDelayMinutes | DepDel15 | TaxiOut | TaxiIn | ArrDelay | ArrDelayMinutes | ArrDel15 | FlightTimeBuffer | AirTime | AirSpeed |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2016-01-31 | OO | N905SW | 6258 | ORD | Chicago, IL | IL | OKC | Oklahoma City, OK | OK | 1919 | 1911 | 1926 | 2123 | 2141 | 2129 | 0 | 0 | 142 | 138 | 693 | 2016-01-31 19:19:00 | 2016-01-31 21:41:00 | 2016-01-31 19:11:00 | 2016-01-31 21:29:00 | 2016-01-31 19:26:00 | 2016-01-31 21:23:00 | -8 | 0 | 0 | 15 | 6 | -12 | 0 | 0 | 4 | 117 | 355 |
| 2016-01-25 | OO | N782SK | 5707 | PDX | Portland, OR | OR | DEN | Denver, CO | CO | 1050 | 1050 | 1105 | 1413 | 1421 | 1422 | 0 | 0 | 151 | 152 | 991 | 2016-01-25 10:50:00 | 2016-01-25 14:21:00 | 2016-01-25 10:50:00 | 2016-01-25 14:22:00 | 2016-01-25 11:05:00 | 2016-01-25 14:13:00 | 0 | 0 | 0 | 15 | 9 | 1 | 1 | 0 | -1 | 128 | 465 |
| 2016-01-15 | WN | N460WN | 1076 | PHX | Phoenix, AZ | AZ | DEN | Denver, CO | CO | 620 | 616 | 629 | 747 | 810 | 755 | 0 | 0 | 110 | 99 | 602 | 2016-01-15 06:20:00 | 2016-01-15 08:10:00 | 2016-01-15 06:16:00 | 2016-01-15 07:55:00 | 2016-01-15 06:29:00 | 2016-01-15 07:47:00 | -4 | 0 | 0 | 13 | 8 | -15 | 0 | 0 | 11 | 78 | 463 |
| 2016-01-11 | AA | N5EJAA | 2495 | DFW | Dallas/Fort Worth, TX | TX | LAX | Los Angeles, CA | CA | 2030 | 2029 | 2047 | 2117 | 2157 | 2122 | 0 | 0 | 207 | 173 | 1235 | 2016-01-11 20:30:00 | 2016-01-11 21:57:00 | 2016-01-11 20:29:00 | 2016-01-11 21:22:00 | 2016-01-11 20:47:00 | 2016-01-11 21:17:00 | -1 | 0 | 0 | 18 | 5 | -35 | 0 | 0 | 34 | 150 | 494 |
| 2016-01-01 | UA | N477UA | 371 | ATL | Atlanta, GA | GA | IAH | Houston, TX | TX | 1855 | 1852 | 1906 | 1958 | 2025 | 2009 | 0 | 0 | 150 | 137 | 689 | 2016-01-01 18:55:00 | 2016-01-01 20:25:00 | 2016-01-01 18:52:00 | 2016-01-01 20:09:00 | 2016-01-01 19:06:00 | 2016-01-01 19:58:00 | -3 | 0 | 0 | 14 | 11 | -16 | 0 | 0 | 13 | 112 | 369 |
| 2016-01-22 | EV | N857AS | 4873 | ATL | Atlanta, GA | GA | GNV | Gainesville, FL | FL | 2230 | 11 | 57 | 153 | 2338 | 157 | 0 | 0 | 68 | 106 | 300 | 2016-01-22 22:30:00 | 2016-01-22 23:38:00 | 2016-01-22 00:11:00 | 2016-01-22 01:57:00 | 2016-01-22 00:57:00 | 2016-01-22 01:53:00 | -1339 | 0 | 0 | 46 | 4 | -1301 | 0 | 0 | -38 | 56 | 321 |
Here, the graph and table illustrate which airlines have the most departure delays. These delays can be any length of time.
DD <- ASample %>% group_by(Carrier) %>%
mutate(delayed = ifelse(DepDelay > 0, 1, 0)) %>%
summarize(perc_delay = sum(delayed) / n())
DD %>% ggvis(~Carrier, ~perc_delay) %>% layer_bars(fill := "#D1062C", stroke := NA)
Here, we can see long delays, in excess of 2 hours. This occurs rarely.
Minimum2HrDepDelay <- ASample %>% filter(DepDelay >= 120, Cancelled == 0) %>% select(Carrier,TailNum, FlightNum, OriginCityName, DestCityName, new_CRSDepTime, new_CRSArrTime, DepDelay) %>% arrange(desc(DepDelay))
kable(head(Minimum2HrDepDelay))
| Carrier | TailNum | FlightNum | OriginCityName | DestCityName | new_CRSDepTime | new_CRSArrTime | DepDelay |
|---|---|---|---|---|---|---|---|
| OO | N629BR | 7423 | Brainerd, MN | Minneapolis, MN | 2016-01-24 07:20:00 | 2016-01-24 08:14:00 | 652 |
| B6 | N506JB | 573 | Newark, NJ | Tampa, FL | 2016-01-24 12:58:00 | 2016-01-24 15:58:00 | 571 |
| EV | N176PQ | 5051 | Minneapolis, MN | Rochester, MN | 2016-01-12 13:25:00 | 2016-01-12 14:14:00 | 427 |
| UA | N66841 | 1880 | San Juan, PR | Newark, NJ | 2016-01-24 13:10:00 | 2016-01-24 16:30:00 | 389 |
| OO | N457SW | 4832 | Detroit, MI | Dayton, OH | 2016-01-25 15:50:00 | 2016-01-25 16:53:00 | 367 |
| F9 | N918FR | 1184 | Milwaukee, WI | Orlando, FL | 2016-01-11 06:45:00 | 2016-01-11 10:50:00 | 354 |
Minimum2HrDepDelay %>% ggvis(~Carrier, fill := "#D1062C", stroke := NA)
## Guessing layer_bars()
Arrival delays, like departure delays, occur frequently, but long delays are actually quite rare.
A1 <- ASample %>% group_by(Carrier) %>%
mutate(delayed =ifelse(DepDelay > 0, 1, 0)) %>%
summarize(perc_delay = sum(delayed) / n())
A1 %>% ggvis(~Carrier, ~perc_delay) %>% layer_bars(fill := "#F5406D", stroke := NA)
Below is a table with the longest 6 arrival delays in descending order from the sample. There were 156 delays altogether out of 9,360 flights.
Minimum2HrArrDelay <- ASample %>% filter(ArrDelay >= 120, Cancelled == 0) %>% select(Carrier,TailNum, FlightNum, OriginCityName, DestCityName, new_CRSDepTime, new_CRSArrTime, ArrDelay) %>% arrange(desc(ArrDelay))
kable(head(Minimum2HrArrDelay))
| Carrier | TailNum | FlightNum | OriginCityName | DestCityName | new_CRSDepTime | new_CRSArrTime | ArrDelay |
|---|---|---|---|---|---|---|---|
| DL | N951DL | 2242 | New York, NY | Orlando, FL | 2016-01-14 21:00:00 | 2016-01-14 00:03:00 | 1432 |
| DL | N3744F | 2501 | Las Vegas, NV | New York, NY | 2016-01-01 16:20:00 | 2016-01-01 00:07:00 | 1430 |
| WN | N483WN | 414 | Las Vegas, NV | Albuquerque, NM | 2016-01-29 21:50:00 | 2016-01-29 00:10:00 | 1430 |
| B6 | N206JB | 486 | New York, NY | Rochester, NY | 2016-01-06 22:45:00 | 2016-01-06 00:02:00 | 1429 |
| AA | N3AGAA | 234 | Miami, FL | Chicago, IL | 2016-01-17 21:29:00 | 2016-01-17 00:01:00 | 1428 |
| DL | N906DL | 2651 | Atlanta, GA | Columbia, SC | 2016-01-27 23:12:00 | 2016-01-27 00:09:00 | 1428 |
Minimum2HrArrDelay %>% ggvis(~Carrier, fill := "#F5406D", stroke := NA)
## Guessing layer_bars()
ASample %>% group_by(Carrier) %>% summarize(percent2hr = sum(DepDelay >=120/n()), count_records = n()) %>% ggvis(~Carrier, ~count_records) %>% layer_bars(fill := "#CCB359", stroke := NA)